diff --git a/CHANGELOG.md b/CHANGELOG.md index 00dd901cbf..653cc5b1e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support `ArrowJson` schema output format in QGL API and CLI commands - New `kamu system compact ` command that compacts dataslices for the given dataset ### Changed -- Next batch of optimizations of metadata chain traversal through API using Visitors +- Case insensitive comparisons of `dataset`s, `account`s and `repo`s ## [0.170.0] - 2024-03-29 ### Added diff --git a/src/adapter/graphql/src/queries/datasets/datasets.rs b/src/adapter/graphql/src/queries/datasets/datasets.rs index a05de1ea4a..5382bf4d6b 100644 --- a/src/adapter/graphql/src/queries/datasets/datasets.rs +++ b/src/adapter/graphql/src/queries/datasets/datasets.rs @@ -39,15 +39,14 @@ impl Datasets { account_name: AccountName, dataset_name: DatasetName, ) -> Result> { - let account = Account::from_account_name(account_name.clone().into()); - let dataset_alias = odf::DatasetAlias::new(Some(account_name.into()), dataset_name.into()); let dataset_repo = from_catalog::(ctx).unwrap(); let hdl = dataset_repo .try_resolve_dataset_ref(&dataset_alias.into_local_ref()) .await?; - Ok(hdl.map(|h| Dataset::new(account, h))) + + Ok(hdl.map(|h| Dataset::new(Account::from_dataset_alias(ctx, &h.alias), h))) } #[graphql(skip)] diff --git a/src/adapter/graphql/tests/tests/test_accounts.rs b/src/adapter/graphql/tests/tests/test_accounts.rs index 4e26fa49fc..61f44add89 100644 --- a/src/adapter/graphql/tests/tests/test_accounts.rs +++ b/src/adapter/graphql/tests/tests/test_accounts.rs @@ -17,20 +17,7 @@ use opendatafabric::AccountName; #[test_log::test(tokio::test)] async fn test_account_by_name() { - let mut mock_authentication_service = MockAuthenticationService::new(); - mock_authentication_service - .expect_find_account_info_by_name() - .with(eq(AccountName::new_unchecked(DEFAULT_ACCOUNT_NAME))) - .returning(|_| Ok(Some(AccountInfo::dummy()))); - mock_authentication_service - .expect_find_account_info_by_name() - .with(eq(AccountName::new_unchecked("unknown"))) - .returning(|_| Ok(None)); - - let cat = dill::CatalogBuilder::new() - .add_value(mock_authentication_service) - .bind::() - .build(); + let harness = GraphQLAccountsHarness::new(); let schema = kamu_adapter_graphql::schema_quiet(); let res = schema @@ -46,7 +33,7 @@ async fn test_account_by_name() { }} "#, )) - .data(cat.clone()), + .data(harness.catalog.clone()), ) .await; @@ -76,7 +63,7 @@ async fn test_account_by_name() { "#, "unknown", )) - .data(cat), + .data(harness.catalog.clone()), ) .await; @@ -89,6 +76,60 @@ async fn test_account_by_name() { } }) ); + + let res = schema + .execute( + async_graphql::Request::new(format!( + r#" + query {{ + accounts {{ + byName (name: "{}") {{ + accountName + }} + }} + }} + "#, + DEFAULT_ACCOUNT_NAME.to_ascii_uppercase(), + )) + .data(harness.catalog), + ) + .await; + + assert!(res.is_ok(), "{res:?}"); + assert_eq!( + res.data, + value!({ + "accounts": { + "byName": { + "accountName": DEFAULT_ACCOUNT_NAME + } + } + }) + ); } //////////////////////////////////////////////////////////////////////////////////////// + +struct GraphQLAccountsHarness { + catalog: dill::Catalog, +} + +impl GraphQLAccountsHarness { + pub fn new() -> Self { + let mut mock_authentication_service = MockAuthenticationService::new(); + mock_authentication_service + .expect_find_account_info_by_name() + .with(eq(AccountName::new_unchecked(DEFAULT_ACCOUNT_NAME))) + .returning(|_| Ok(Some(AccountInfo::dummy()))); + mock_authentication_service + .expect_find_account_info_by_name() + .with(eq(AccountName::new_unchecked("unknown"))) + .returning(|_| Ok(None)); + let catalog = dill::CatalogBuilder::new() + .add_value(mock_authentication_service) + .bind::() + .build(); + + Self { catalog } + } +} diff --git a/src/adapter/graphql/tests/tests/test_gql_data.rs b/src/adapter/graphql/tests/tests/test_gql_data.rs index 27c5865c6c..56fc746533 100644 --- a/src/adapter/graphql/tests/tests/test_gql_data.rs +++ b/src/adapter/graphql/tests/tests/test_gql_data.rs @@ -22,15 +22,18 @@ use opendatafabric::*; ///////////////////////////////////////////////////////////////////////////////////////// -fn create_catalog_with_local_workspace(tempdir: &Path) -> dill::Catalog { +fn create_catalog_with_local_workspace(tempdir: &Path, is_multitenant: bool) -> dill::Catalog { + let datasets_dir = tempdir.join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + dill::CatalogBuilder::new() .add::() .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.join("datasets")) + .with_root(datasets_dir) .with_current_account_subject(Arc::new(CurrentAccountSubject::new_test())) - .with_multi_tenant(false), + .with_multi_tenant(is_multitenant), ) .bind::() .add::() @@ -42,12 +45,16 @@ fn create_catalog_with_local_workspace(tempdir: &Path) -> dill::Catalog { ///////////////////////////////////////////////////////////////////////////////////////// -async fn create_test_dataset(catalog: &dill::Catalog, tempdir: &Path) { +async fn create_test_dataset( + catalog: &dill::Catalog, + tempdir: &Path, + account_name: Option, +) { let dataset_repo = catalog.get_one::().unwrap(); let dataset = dataset_repo .create_dataset( - &DatasetAlias::new(None, DatasetName::new_unchecked("foo")), + &DatasetAlias::new(account_name, DatasetName::new_unchecked("foo")), MetadataFactory::metadata_block(MetadataFactory::seed(DatasetKind::Root).build()) .build_typed(), ) @@ -103,8 +110,8 @@ async fn create_test_dataset(catalog: &dill::Catalog, tempdir: &Path) { #[test_log::test(tokio::test)] async fn test_dataset_schema_local_fs() { let tempdir = tempfile::tempdir().unwrap(); - let catalog = create_catalog_with_local_workspace(tempdir.path()); - create_test_dataset(&catalog, tempdir.path()).await; + let catalog = create_catalog_with_local_workspace(tempdir.path(), true); + create_test_dataset(&catalog, tempdir.path(), None).await; let schema = kamu_adapter_graphql::schema_quiet(); let res = schema @@ -162,8 +169,8 @@ async fn test_dataset_schema_local_fs() { #[test_log::test(tokio::test)] async fn test_dataset_tail_local_fs() { let tempdir = tempfile::tempdir().unwrap(); - let catalog = create_catalog_with_local_workspace(tempdir.path()); - create_test_dataset(&catalog, tempdir.path()).await; + let catalog = create_catalog_with_local_workspace(tempdir.path(), true); + create_test_dataset(&catalog, tempdir.path(), None).await; let schema = kamu_adapter_graphql::schema_quiet(); let res = schema @@ -203,8 +210,8 @@ async fn test_dataset_tail_local_fs() { #[test_log::test(tokio::test)] async fn test_dataset_tail_empty_local_fs() { let tempdir = tempfile::tempdir().unwrap(); - let catalog = create_catalog_with_local_workspace(tempdir.path()); - create_test_dataset(&catalog, tempdir.path()).await; + let catalog = create_catalog_with_local_workspace(tempdir.path(), true); + create_test_dataset(&catalog, tempdir.path(), None).await; let schema = kamu_adapter_graphql::schema_quiet(); let res = schema diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs index 9127a0c540..0162239a2d 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs @@ -1099,12 +1099,14 @@ struct FlowConfigHarness { impl FlowConfigHarness { fn new() -> Self { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog_base = dill::CatalogBuilder::new() .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs index b52e61c424..c67a1f9f0b 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs @@ -1701,6 +1701,8 @@ impl FlowRunsHarness { let tempdir = tempfile::tempdir().unwrap(); let run_info_dir = tempdir.path().join("run"); let cache_dir = tempdir.path().join("cache"); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(&cache_dir).unwrap(); @@ -1711,7 +1713,7 @@ impl FlowRunsHarness { .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/adapter/graphql/tests/tests/test_gql_datasets.rs b/src/adapter/graphql/tests/tests/test_gql_datasets.rs index b704495ac7..544b182dba 100644 --- a/src/adapter/graphql/tests/tests/test_gql_datasets.rs +++ b/src/adapter/graphql/tests/tests/test_gql_datasets.rs @@ -24,7 +24,7 @@ use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; #[test_log::test(tokio::test)] async fn dataset_by_id_does_not_exist() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let res = harness.execute_anonymous_query(indoc!( r#" { @@ -52,10 +52,10 @@ async fn dataset_by_id_does_not_exist() { #[test_log::test(tokio::test)] async fn dataset_by_id() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let res = harness @@ -92,9 +92,56 @@ async fn dataset_by_id() { //////////////////////////////////////////////////////////////////////////////////////// +#[test_log::test(tokio::test)] +async fn dataset_by_account_and_name_case_insensetive() { + let harness = GraphQLDatasetsHarness::new(true); + + let account_name_str = "KaMu"; + harness + .create_root_dataset( + Some(AccountName::new_unchecked(account_name_str)), + DatasetName::new_unchecked("Foo"), + ) + .await; + + let res = harness + .execute_anonymous_query( + indoc!( + r#" + { + datasets { + byOwnerAndName(accountName: "kAmU", datasetName: "") { + name, + owner { accountName }, + } + } + } + "# + ) + .replace("", "FoO"), + ) + .await; + assert!(res.is_ok(), "{res:?}"); + assert_eq!( + res.data, + value!({ + "datasets": { + "byOwnerAndName": { + "name": "Foo", + "owner": { + "accountName": account_name_str, + } + } + } + }) + ); +} + +//////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn dataset_create_empty() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let request_code = indoc::indoc!( r#" @@ -136,7 +183,7 @@ async fn dataset_create_empty() { #[test_log::test(tokio::test)] async fn dataset_create_from_snapshot() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(true); let snapshot = MetadataFactory::dataset_snapshot() .name("foo") @@ -192,7 +239,7 @@ async fn dataset_create_from_snapshot() { #[test_log::test(tokio::test)] async fn dataset_create_from_snapshot_malformed() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let res = harness .execute_authorized_query(indoc!( @@ -226,10 +273,10 @@ async fn dataset_create_from_snapshot_malformed() { #[test_log::test(tokio::test)] async fn dataset_rename_success() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let request_code = indoc!( @@ -278,10 +325,10 @@ async fn dataset_rename_success() { #[test_log::test(tokio::test)] async fn dataset_rename_no_changes() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let res = harness @@ -328,13 +375,13 @@ async fn dataset_rename_no_changes() { #[test_log::test(tokio::test)] async fn dataset_rename_name_collision() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let _bar_result = harness - .create_root_dataset(DatasetName::new_unchecked("bar")) + .create_root_dataset(None, DatasetName::new_unchecked("bar")) .await; let res = harness @@ -381,11 +428,11 @@ async fn dataset_rename_name_collision() { #[test_log::test(tokio::test)] async fn dataset_delete_success() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); harness.init_dependencies_graph().await; let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let request_code = indoc!( @@ -431,11 +478,11 @@ async fn dataset_delete_success() { #[test_log::test(tokio::test)] async fn dataset_delete_dangling_ref() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); harness.init_dependencies_graph().await; let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let _bar_result = harness .create_derived_dataset( @@ -489,10 +536,10 @@ async fn dataset_delete_dangling_ref() { #[test_log::test(tokio::test)] async fn dataset_view_permissions() { - let harness = GraphQLDatasetsHarness::new(); + let harness = GraphQLDatasetsHarness::new(false); let foo_result = harness - .create_root_dataset(DatasetName::new_unchecked("foo")) + .create_root_dataset(None, DatasetName::new_unchecked("foo")) .await; let request_code = indoc!( @@ -544,7 +591,7 @@ struct GraphQLDatasetsHarness { } impl GraphQLDatasetsHarness { - pub fn new() -> Self { + pub fn new(is_multi_tenant: bool) -> Self { let tempdir = tempfile::tempdir().unwrap(); let datasets_dir = tempdir.path().join("datasets"); std::fs::create_dir(&datasets_dir).unwrap(); @@ -555,7 +602,7 @@ impl GraphQLDatasetsHarness { .add_builder( DatasetRepositoryLocalFs::builder() .with_root(datasets_dir) - .with_multi_tenant(false), + .with_multi_tenant(is_multi_tenant), ) .bind::() .add_value(kamu::testing::MockAuthenticationService::built_in()) @@ -588,7 +635,11 @@ impl GraphQLDatasetsHarness { .unwrap(); } - pub async fn create_root_dataset(&self, name: DatasetName) -> CreateDatasetResult { + pub async fn create_root_dataset( + &self, + account_name: Option, + name: DatasetName, + ) -> CreateDatasetResult { let dataset_repo = self .catalog_authorized .get_one::() @@ -596,7 +647,7 @@ impl GraphQLDatasetsHarness { dataset_repo .create_dataset_from_snapshot( MetadataFactory::dataset_snapshot() - .name(DatasetAlias::new(None, name)) + .name(DatasetAlias::new(account_name, name)) .kind(DatasetKind::Root) .push_event(MetadataFactory::set_polling_source().build()) .build(), diff --git a/src/adapter/graphql/tests/tests/test_gql_metadata.rs b/src/adapter/graphql/tests/tests/test_gql_metadata.rs index eced5d86b8..9355727983 100644 --- a/src/adapter/graphql/tests/tests/test_gql_metadata.rs +++ b/src/adapter/graphql/tests/tests/test_gql_metadata.rs @@ -23,12 +23,14 @@ use crate::utils::authentication_catalogs; #[test_log::test(tokio::test)] async fn test_current_push_sources() { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let base_catalog = CatalogBuilder::new() .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs b/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs index 399df2b44c..501da4abd2 100644 --- a/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs +++ b/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs @@ -25,23 +25,11 @@ use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; #[test_log::test(tokio::test)] async fn test_metadata_chain_events() { - let tempdir = tempfile::tempdir().unwrap(); - - let base_catalog = dill::CatalogBuilder::new() - .add::() - .add::() - .add_builder( - DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) - .with_multi_tenant(false), - ) - .bind::() - .add::() - .build(); + let harness = GraphQLMetadataChainHarness::new(false); // Init dataset - let (_, catalog_authorized) = authentication_catalogs(&base_catalog); - let dataset_repo = catalog_authorized + let dataset_repo = harness + .catalog_authorized .get_one::() .unwrap(); let create_result = dataset_repo @@ -124,7 +112,7 @@ async fn test_metadata_chain_events() { let schema = kamu_adapter_graphql::schema_quiet(); let res = schema - .execute(async_graphql::Request::new(request_code.clone()).data(catalog_authorized)) + .execute(async_graphql::Request::new(request_code.clone()).data(harness.catalog_authorized)) .await; assert!(res.is_ok(), "{res:?}"); @@ -177,23 +165,10 @@ async fn test_metadata_chain_events() { #[test_log::test(tokio::test)] async fn metadata_chain_append_event() { - let tempdir = tempfile::tempdir().unwrap(); - - let base_catalog = dill::CatalogBuilder::new() - .add::() - .add::() - .add_builder( - DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) - .with_multi_tenant(false), - ) - .bind::() - .add::() - .build(); + let harness = GraphQLMetadataChainHarness::new(false); - let (catalog_anonymous, catalog_authorized) = authentication_catalogs(&base_catalog); - - let dataset_repo = catalog_authorized + let dataset_repo = harness + .catalog_authorized .get_one::() .unwrap(); let create_result = dataset_repo @@ -243,12 +218,12 @@ async fn metadata_chain_append_event() { let schema = kamu_adapter_graphql::schema_quiet(); let res = schema - .execute(async_graphql::Request::new(request_code.clone()).data(catalog_anonymous)) + .execute(async_graphql::Request::new(request_code.clone()).data(harness.catalog_anonymous)) .await; expect_anonymous_access_error(res); let res = schema - .execute(async_graphql::Request::new(request_code.clone()).data(catalog_authorized)) + .execute(async_graphql::Request::new(request_code.clone()).data(harness.catalog_authorized)) .await; assert!(res.is_ok(), "{res:?}"); assert_eq!( @@ -273,23 +248,10 @@ async fn metadata_chain_append_event() { #[test_log::test(tokio::test)] async fn metadata_update_readme_new() { - let tempdir = tempfile::tempdir().unwrap(); - - let base_catalog = dill::CatalogBuilder::new() - .add::() - .add::() - .add_builder( - DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) - .with_multi_tenant(false), - ) - .bind::() - .add::() - .build(); + let harness = GraphQLMetadataChainHarness::new(false); - let (catalog_anonymous, catalog_authorized) = authentication_catalogs(&base_catalog); - - let dataset_repo = catalog_authorized + let dataset_repo = harness + .catalog_authorized .get_one::() .unwrap(); let create_result = dataset_repo @@ -329,14 +291,16 @@ async fn metadata_update_readme_new() { let res = schema .execute( - async_graphql::Request::new(new_readme_request_code.clone()).data(catalog_anonymous), + async_graphql::Request::new(new_readme_request_code.clone()) + .data(harness.catalog_anonymous), ) .await; expect_anonymous_access_error(res); let res = schema .execute( - async_graphql::Request::new(new_readme_request_code).data(catalog_authorized.clone()), + async_graphql::Request::new(new_readme_request_code) + .data(harness.catalog_authorized.clone()), ) .await; @@ -397,7 +361,7 @@ async fn metadata_update_readme_new() { ) .replace("", &create_result.dataset_handle.id.to_string()), ) - .data(catalog_authorized.clone()), + .data(harness.catalog_authorized.clone()), ) .await; @@ -435,7 +399,7 @@ async fn metadata_update_readme_new() { ) .replace("", &create_result.dataset_handle.id.to_string()), ) - .data(catalog_authorized.clone()), + .data(harness.catalog_authorized.clone()), ) .await; @@ -488,7 +452,7 @@ async fn metadata_update_readme_new() { ) .replace("", &create_result.dataset_handle.id.to_string()), ) - .data(catalog_authorized), + .data(harness.catalog_authorized), ) .await; @@ -533,3 +497,37 @@ async fn assert_attachments_eq(dataset: Arc, expected: SetAttachmen } //////////////////////////////////////////////////////////////////////////////////////// + +struct GraphQLMetadataChainHarness { + _tempdir: tempfile::TempDir, + catalog_authorized: dill::Catalog, + catalog_anonymous: dill::Catalog, +} + +impl GraphQLMetadataChainHarness { + fn new(is_multi_tenant: bool) -> Self { + let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + + let base_catalog = dill::CatalogBuilder::new() + .add::() + .add::() + .add_builder( + DatasetRepositoryLocalFs::builder() + .with_root(datasets_dir) + .with_multi_tenant(is_multi_tenant), + ) + .bind::() + .add::() + .build(); + + let (catalog_anonymous, catalog_authorized) = authentication_catalogs(&base_catalog); + + Self { + _tempdir: tempdir, + catalog_anonymous, + catalog_authorized, + } + } +} diff --git a/src/adapter/graphql/tests/tests/test_gql_search.rs b/src/adapter/graphql/tests/tests/test_gql_search.rs index 8fa72e9606..e024930df1 100644 --- a/src/adapter/graphql/tests/tests/test_gql_search.rs +++ b/src/adapter/graphql/tests/tests/test_gql_search.rs @@ -18,6 +18,8 @@ use opendatafabric::*; #[tokio::test] async fn query() { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let cat = dill::CatalogBuilder::new() .add::() @@ -26,7 +28,7 @@ async fn query() { .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs b/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs index a50dee80b1..0ec30e0bb6 100644 --- a/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs +++ b/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs @@ -211,6 +211,8 @@ impl ServerHarness { dataset_action_authorizer: MockDatasetActionAuthorizer, ) -> Self { let temp_dir = tempfile::TempDir::new().unwrap(); + let datasets_dir = temp_dir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let mut catalog_builder = dill::CatalogBuilder::new(); catalog_builder.add::(); @@ -227,7 +229,7 @@ impl ServerHarness { .add_builder( DatasetRepositoryLocalFs::builder() .with_multi_tenant(false) - .with_root(temp_dir.path().join("datasets")), + .with_root(datasets_dir), ) .bind::(); diff --git a/src/adapter/http/tests/tests/test_routing.rs b/src/adapter/http/tests/tests/test_routing.rs index a0b0ce7b59..904e170810 100644 --- a/src/adapter/http/tests/tests/test_routing.rs +++ b/src/adapter/http/tests/tests/test_routing.rs @@ -209,6 +209,32 @@ async fn test_routing_dataset_name() { await_client_server_flow!(server, client); } +#[test_log::test(tokio::test)] +async fn test_routing_dataset_name_case_insensetive() { + let repo = setup_repo().await; + + let server = setup_server( + repo.catalog, + "/:dataset_name", + |Path(p): Path| DatasetAlias::new(None, p.dataset_name).into_local_ref(), + ); + + let dataset_url = url::Url::parse(&format!( + "http://{}/{}/", + server.local_addr(), + repo.created_dataset + .dataset_handle + .alias + .dataset_name + .to_ascii_uppercase() + )) + .unwrap(); + + let client = setup_client(dataset_url, repo.created_dataset.head); + + await_client_server_flow!(server, client); +} + ///////////////////////////////////////////////////////////////////////////////////////// #[allow(dead_code)] diff --git a/src/adapter/odata/tests/tests/test_handlers.rs b/src/adapter/odata/tests/tests/test_handlers.rs index b0f8063f30..11360d8313 100644 --- a/src/adapter/odata/tests/tests/test_handlers.rs +++ b/src/adapter/odata/tests/tests/test_handlers.rs @@ -247,8 +247,10 @@ impl TestHarness { let temp_dir = tempfile::tempdir().unwrap(); let run_info_dir = temp_dir.path().join("run"); let cache_dir = temp_dir.path().join("cache"); + let datasets_dir = temp_dir.path().join("datasets"); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(cache_dir).unwrap(); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() @@ -260,7 +262,7 @@ impl TestHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(temp_dir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/app/cli/src/services/accounts/account_service.rs b/src/app/cli/src/services/accounts/account_service.rs index 0907dd89ba..3bdf537a2e 100644 --- a/src/app/cli/src/services/accounts/account_service.rs +++ b/src/app/cli/src/services/accounts/account_service.rs @@ -27,7 +27,7 @@ pub const LOGIN_METHOD_PASSWORD: &str = "password"; ///////////////////////////////////////////////////////////////////////////////////////// pub struct AccountService { - pub predefined_accounts: HashMap, + pub predefined_accounts: HashMap, pub allow_login_unknown: bool, } @@ -40,7 +40,7 @@ impl AccountService { for predefined_account in &users_config.predefined { predefined_accounts.insert( - predefined_account.account_name.to_string(), + predefined_account.account_name.clone(), predefined_account.clone(), ); } @@ -120,14 +120,14 @@ impl AccountService { RelatedAccountIndication::new(target_account) } - fn find_account_info_impl(&self, account_name: &String) -> Option { + fn find_account_info_impl(&self, account_name: &AccountName) -> Option { // The account might be predefined in the configuration self.predefined_accounts.get(account_name).cloned() } fn get_account_info_impl( &self, - account_name: &String, + account_name: &AccountName, ) -> Result { // The account might be predefined in the configuration match self.predefined_accounts.get(account_name) { @@ -140,9 +140,9 @@ impl AccountService { if self.allow_login_unknown { Ok(AccountInfo { account_id: FAKE_ACCOUNT_ID.to_string(), - account_name: AccountName::new_unchecked(account_name), + account_name: account_name.clone(), account_type: AccountType::User, - display_name: account_name.clone(), + display_name: account_name.to_string(), avatar_url: None, is_admin: false, }) @@ -190,7 +190,9 @@ impl auth::AuthenticationProvider for AccountService { // The account might be predefined in the configuration let account_info = self - .get_account_info_impl(&password_login_credentials.login) + .get_account_info_impl(&AccountName::new_unchecked( + &password_login_credentials.login, + )) .map_err(auth::ProviderLoginError::RejectedCredentials)?; // Store login as provider credentials @@ -216,7 +218,7 @@ impl auth::AuthenticationProvider for AccountService { .int_err()?; let account_info = self - .get_account_info_impl(&provider_credentials.account_name.to_string()) + .get_account_info_impl(&provider_credentials.account_name) .int_err()?; Ok(account_info) @@ -226,7 +228,7 @@ impl auth::AuthenticationProvider for AccountService { &'a self, account_name: &'a AccountName, ) -> Result, InternalError> { - Ok(self.find_account_info_impl(&account_name.into())) + Ok(self.find_account_info_impl(account_name)) } } diff --git a/src/domain/opendatafabric/src/identity/dataset_identity.rs b/src/domain/opendatafabric/src/identity/dataset_identity.rs index 6f4656ea2e..403e652aba 100644 --- a/src/domain/opendatafabric/src/identity/dataset_identity.rs +++ b/src/domain/opendatafabric/src/identity/dataset_identity.rs @@ -7,7 +7,9 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::borrow::Cow; use std::convert::{AsRef, TryFrom}; +use std::hash::Hash; use std::sync::Arc; use std::{cmp, fmt, ops}; @@ -104,13 +106,13 @@ macro_rules! impl_serde { } pub(crate) use impl_serde; -use like::Like; +use like::ILike; //////////////////////////////////////////////////////////////////////////////// macro_rules! newtype_str { ($typ:ident, $parse:expr, $visitor:ident) => { - #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + #[derive(Debug, Clone, Eq)] pub struct $typ(Arc); impl $typ { @@ -129,6 +131,15 @@ macro_rules! newtype_str { pub fn from_inner_unchecked(s: Arc) -> Self { Self(s) } + + pub fn into_lowercase(s: &str) -> Cow<'_, str> { + let bytes = s.as_bytes(); + if !bytes.iter().any(u8::is_ascii_uppercase) { + Cow::Borrowed(s) + } else { + Cow::Owned(s.to_ascii_lowercase()) + } + } } impl From<$typ> for String { @@ -159,6 +170,30 @@ macro_rules! newtype_str { } } + impl PartialEq for $typ { + fn eq(&self, other: &$typ) -> bool { + self.eq_ignore_ascii_case(other) + } + } + + impl Hash for $typ { + fn hash(&self, state: &mut H) { + Self::into_lowercase(&self.0).hash(state); + } + } + + impl PartialOrd for $typ { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + impl Ord for $typ { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + Self::into_lowercase(self).cmp(&Self::into_lowercase(other)) + } + } + impl ops::Deref for $typ { type Target = str; @@ -233,7 +268,7 @@ newtype_str!( impl DatasetNamePattern { pub fn matches(&self, dataset_name: &DatasetName) -> bool { - Like::::like(dataset_name.as_str(), self).unwrap() + ILike::::ilike(dataset_name.as_str(), self).unwrap() } } diff --git a/src/domain/opendatafabric/src/lib.rs b/src/domain/opendatafabric/src/lib.rs index 6c6ef631db..3c2dfceef2 100644 --- a/src/domain/opendatafabric/src/lib.rs +++ b/src/domain/opendatafabric/src/lib.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. #![feature(error_generic_member_access)] +#![feature(let_chains)] pub mod dtos; pub use dtos::*; diff --git a/src/domain/opendatafabric/tests/tests/test_dataset_identity.rs b/src/domain/opendatafabric/tests/tests/test_dataset_identity.rs index 2272df140f..01901f7058 100644 --- a/src/domain/opendatafabric/tests/tests/test_dataset_identity.rs +++ b/src/domain/opendatafabric/tests/tests/test_dataset_identity.rs @@ -233,3 +233,55 @@ fn test_dataset_refs_conversions() { alias: DatasetAlias::try_from("bar").unwrap(), }); } + +#[test] +fn test_dataset_alias_eq() { + assert_eq!( + DatasetAlias::from_str("account/net.example.com").unwrap(), + DatasetAlias::from_str("aCCouNt/net.ExaMplE.coM").unwrap(), + ); + assert_eq!( + DatasetAlias::from_str("account/net.example.com").unwrap(), + DatasetAlias::from_str("account/net.example.com").unwrap(), + ); + assert_eq!( + DatasetAlias::from_str("net.example.com").unwrap(), + DatasetAlias::from_str("net.ExaMplE.coM").unwrap(), + ); + assert_ne!( + DatasetAlias::from_str("account/net.example.com").unwrap(), + DatasetAlias::from_str("aCCouNt1/net.eXamPle.cOm").unwrap(), + ); + assert_ne!( + DatasetAlias::from_str("account1/net.example.com").unwrap(), + DatasetAlias::from_str("account/net.example.com").unwrap(), + ); + assert_ne!( + DatasetAlias::from_str("net.example.com").unwrap(), + DatasetAlias::from_str("account/net.example.com").unwrap(), + ); +} + +#[test] +fn test_dataset_remote_alias_eq() { + assert_eq!( + DatasetAliasRemote::from_str("repository/net.example.com").unwrap(), + DatasetAliasRemote::from_str("repository/net.ExaMplE.coM").unwrap(), + ); + assert_eq!( + DatasetAliasRemote::from_str("repository/net.example.com").unwrap(), + DatasetAliasRemote::from_str("repository/net.example.com").unwrap(), + ); + assert_eq!( + DatasetAliasRemote::from_str("repository/account/net.example.com").unwrap(), + DatasetAliasRemote::from_str("repository/AccOuNt/net.ExaMplE.coM").unwrap(), + ); + assert_eq!( + DatasetAliasRemote::from_str("repository/net.example.com").unwrap(), + DatasetAliasRemote::from_str("rEpoSitOry/net.ExaMplE.coM").unwrap(), + ); + assert_ne!( + DatasetAliasRemote::from_str("repository/account/net.example.com").unwrap(), + DatasetAliasRemote::from_str("repository/net.example.com").unwrap(), + ); +} diff --git a/src/domain/opendatafabric/tests/tests/test_dataset_refs.rs b/src/domain/opendatafabric/tests/tests/test_dataset_refs.rs index 8359b59712..ed72ed4119 100644 --- a/src/domain/opendatafabric/tests/tests/test_dataset_refs.rs +++ b/src/domain/opendatafabric/tests/tests/test_dataset_refs.rs @@ -169,6 +169,18 @@ fn test_dataset_ref_pattern_match() { }, }; assert!(pattern.matches(&dataset_handle)); + + let expression = "nEt.eXample%"; + let dataset_name = "net.example.com"; + let pattern = DatasetRefPattern::from_str(expression).unwrap(); + let dataset_handle = DatasetHandle { + id: default_dataset_id.clone(), + alias: DatasetAlias { + account_name: None, + dataset_name: DatasetName::from_str(dataset_name).unwrap(), + }, + }; + assert!(pattern.matches(&dataset_handle)); } #[test] diff --git a/src/infra/core/src/remote_repository_registry_impl.rs b/src/infra/core/src/remote_repository_registry_impl.rs index 6a758ea886..90786bca96 100644 --- a/src/infra/core/src/remote_repository_registry_impl.rs +++ b/src/infra/core/src/remote_repository_registry_impl.rs @@ -36,6 +36,23 @@ impl RemoteRepositoryRegistryImpl { std::fs::create_dir_all(&repos_dir)?; Ok(Self::new(repos_dir)) } + + pub fn get_repository_file_path(&self, repo_name: &RepoName) -> Option { + let file_path = self.repos_dir.join(repo_name); + + if !file_path.exists() { + // run full scan to support case-insensetive matches + let all_repositories_stream = self.get_all_repositories(); + for repository_name in all_repositories_stream { + if &repository_name == repo_name { + return Some(self.repos_dir.join(repository_name)); + } + } + return None; + } + + Some(file_path) + } } //////////////////////////////////////////////////////////////////////////////////////// @@ -54,25 +71,22 @@ impl RemoteRepositoryRegistry for RemoteRepositoryRegistryImpl { } fn get_repository(&self, repo_name: &RepoName) -> Result { - let file_path = self.repos_dir.join(repo_name); - - if !file_path.exists() { - return Err(RepositoryNotFoundError { - repo_name: repo_name.clone(), - } - .into()); + if let Some(file_path) = self.get_repository_file_path(repo_name) { + let file = std::fs::File::open(file_path).int_err()?; + let manifest: Manifest = + serde_yaml::from_reader(&file).int_err()?; + assert_eq!(manifest.kind, "Repository"); + return Ok(manifest.content); } - let file = std::fs::File::open(&file_path).int_err()?; - let manifest: Manifest = serde_yaml::from_reader(&file).int_err()?; - assert_eq!(manifest.kind, "Repository"); - Ok(manifest.content) + Err(RepositoryNotFoundError { + repo_name: repo_name.clone(), + } + .into()) } fn add_repository(&self, repo_name: &RepoName, mut url: Url) -> Result<(), AddRepoError> { - let file_path = self.repos_dir.join(repo_name); - - if file_path.exists() { + if self.get_repository_file_path(repo_name).is_some() { return Err(RepositoryAlreadyExistsError { repo_name: repo_name.clone(), } @@ -90,23 +104,20 @@ impl RemoteRepositoryRegistry for RemoteRepositoryRegistryImpl { content: RepositoryAccessInfo { url }, }; - let file = std::fs::File::create(&file_path).int_err()?; + let file = std::fs::File::create(self.repos_dir.join(repo_name)).int_err()?; serde_yaml::to_writer(file, &manifest).int_err()?; Ok(()) } fn delete_repository(&self, repo_name: &RepoName) -> Result<(), DeleteRepoError> { - let file_path = self.repos_dir.join(repo_name); - - if !file_path.exists() { - return Err(RepositoryNotFoundError { - repo_name: repo_name.clone(), - } - .into()); + if let Some(file_path) = self.get_repository_file_path(repo_name) { + std::fs::remove_file(file_path).int_err()?; + return Ok(()); } - - std::fs::remove_file(&file_path).int_err()?; - Ok(()) + Err(RepositoryNotFoundError { + repo_name: repo_name.clone(), + } + .into()) } } diff --git a/src/infra/core/src/repos/dataset_repository_local_fs.rs b/src/infra/core/src/repos/dataset_repository_local_fs.rs index de08bebd65..d7efd0eee7 100644 --- a/src/infra/core/src/repos/dataset_repository_local_fs.rs +++ b/src/infra/core/src/repos/dataset_repository_local_fs.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; use async_trait::async_trait; @@ -114,6 +114,18 @@ impl DatasetRepositoryLocalFs { self.storage_strategy.get_dataset_path(&dataset_handle), )) } + + fn get_canonical_path_param(dataset_path: &Path) -> Result<(PathBuf, String), InternalError> { + let canonical_dataset_path = std::fs::canonicalize(dataset_path).int_err()?; + let dataset_name_str = canonical_dataset_path + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_string(); + + Ok((canonical_dataset_path, dataset_name_str)) + } } ///////////////////////////////////////////////////////////////////////////////////////// @@ -240,8 +252,13 @@ impl DatasetRepository for DatasetRepositoryLocalFs { } // It's okay to create a new dataset by this point - let dataset_id = seed_block.event.dataset_id.clone(); - let dataset_handle = DatasetHandle::new(dataset_id, dataset_alias.clone()); + let dataset_handle = DatasetHandle::new( + seed_block.event.dataset_id.clone(), + self.storage_strategy + .canonical_dataset_alias(dataset_alias) + .int_err()?, + ); + let dataset_path = self.storage_strategy.get_dataset_path(&dataset_handle); let layout = DatasetLayout::create(&dataset_path).int_err()?; let dataset = Self::build_dataset(layout, self.event_bus.clone()); @@ -430,6 +447,11 @@ trait DatasetStorageStrategy: Sync + Send { dataset_handle: &DatasetHandle, new_name: &DatasetName, ) -> Result<(), InternalError>; + + fn canonical_dataset_alias( + &self, + raw_alias: &DatasetAlias, + ) -> Result; } #[derive(thiserror::Error, Debug)] @@ -476,10 +498,11 @@ impl DatasetSingleTenantStorageStrategy { &self, dataset_path: &PathBuf, dataset_alias: &DatasetAlias, - ) -> Result { + ) -> Result<(DatasetSummary, DatasetAlias), ResolveDatasetError> { let layout = DatasetLayout::new(dataset_path); let dataset = DatasetRepositoryLocalFs::build_dataset(layout, self.event_bus.clone()); - dataset + + let dataset_summary = dataset .get_summary(GetSummaryOpts::default()) .await .map_err(|e| { @@ -490,7 +513,31 @@ impl DatasetSingleTenantStorageStrategy { } else { ResolveDatasetError::Internal(e.int_err()) } - }) + })?; + + let (_, canonical_dataset_name) = + DatasetRepositoryLocalFs::get_canonical_path_param(dataset_path)?; + let canonical_dataset_alias = DatasetAlias { + dataset_name: DatasetName::new_unchecked(canonical_dataset_name.as_str()), + account_name: None, + }; + + Ok((dataset_summary, canonical_dataset_alias)) + } + + async fn resolve_dataset_handle( + &self, + dataset_path: &PathBuf, + dataset_alias: &DatasetAlias, + ) -> Result { + let (summary, canonical_dataset_alias) = self + .attempt_resolving_summary_via_path(dataset_path, dataset_alias) + .await?; + + Ok(DatasetHandle::new( + summary.id, + canonical_dataset_alias.clone(), + )) } } @@ -517,7 +564,7 @@ impl DatasetStorageStrategy for DatasetSingleTenantStorageStrategy { } let dataset_name = DatasetName::try_from(&dataset_dir_entry.file_name()).int_err()?; let dataset_alias = DatasetAlias::new(None, dataset_name); - match self.resolve_dataset_alias(&dataset_alias).await { + match self.resolve_dataset_handle(&dataset_dir_entry.path(), &dataset_alias).await { Ok(hdl) => { yield hdl; Ok(()) } Err(ResolveDatasetError::NotFound(_)) => Ok(()), Err(e) => Err(e.int_err()) @@ -546,15 +593,27 @@ impl DatasetStorageStrategy for DatasetSingleTenantStorageStrategy { let dataset_path = self.dataset_path_impl(dataset_alias); if !dataset_path.exists() { + use tokio_stream::StreamExt; + + let mut all_datasets_stream = self.get_all_datasets(); + + while let Some(dataset_handle) = all_datasets_stream.try_next().await.unwrap() { + if &dataset_handle.alias == dataset_alias { + return self + .resolve_dataset_handle( + &self.root.join(&dataset_handle.alias.dataset_name), + &dataset_handle.alias, + ) + .await; + } + } return Err(ResolveDatasetError::NotFound(DatasetNotFoundError { dataset_ref: dataset_alias.as_local_ref(), })); } - let summary = self - .attempt_resolving_summary_via_path(&dataset_path, dataset_alias) - .await?; - Ok(DatasetHandle::new(summary.id, dataset_alias.clone())) + self.resolve_dataset_handle(&dataset_path, dataset_alias) + .await } async fn resolve_dataset_id( @@ -578,12 +637,12 @@ impl DatasetStorageStrategy for DatasetSingleTenantStorageStrategy { let dataset_path = self.dataset_path_impl(&alias); - let summary = self + let (summary, canonical_dataset_alias) = self .attempt_resolving_summary_via_path(&dataset_path, &alias) .await?; if summary.id == *dataset_id { - return Ok(DatasetHandle::new(summary.id, alias)); + return Ok(DatasetHandle::new(summary.id, canonical_dataset_alias)); } } @@ -611,6 +670,13 @@ impl DatasetStorageStrategy for DatasetSingleTenantStorageStrategy { std::fs::rename(old_dataset_path, new_dataset_path).int_err()?; Ok(()) } + + fn canonical_dataset_alias( + &self, + raw_alias: &DatasetAlias, + ) -> Result { + Ok(raw_alias.clone()) + } } ///////////////////////////////////////////////////////////////////////////////////////// @@ -727,6 +793,39 @@ impl DatasetMultiTenantStorageStrategy { } }) } + + fn resolve_account_dir( + &self, + account_name: &AccountName, + ) -> Result<(PathBuf, AccountName), ResolveDatasetError> { + let account_dataset_dir_path = self.root.join(account_name); + + if !account_dataset_dir_path.is_dir() { + let read_account_dirs = std::fs::read_dir(self.root.as_path()).int_err()?; + + for read_account_dir in read_account_dirs { + let account_dir_name = AccountName::new_unchecked( + read_account_dir + .int_err()? + .file_name() + .to_str() + .unwrap_or(""), + ); + if account_name == &account_dir_name { + return Ok((self.root.join(&account_dir_name), account_dir_name)); + } + } + return Ok((account_dataset_dir_path, account_name.clone())); + } + + let (canonical_account_dataset_dir_path, canonical_account_name) = + DatasetRepositoryLocalFs::get_canonical_path_param(&account_dataset_dir_path)?; + + Ok(( + canonical_account_dataset_dir_path, + AccountName::new_unchecked(canonical_account_name.as_str()), + )) + } } #[async_trait] @@ -791,10 +890,14 @@ impl DatasetStorageStrategy for DatasetMultiTenantStorageStrategy { dataset_alias: &DatasetAlias, ) -> Result { let effective_account_name = self.effective_account_name(dataset_alias); + let (account_dataset_dir_path, _) = self.resolve_account_dir(effective_account_name)?; - let account_dataset_dir_path = self.root.join(effective_account_name); if account_dataset_dir_path.is_dir() { - let read_dataset_dir = std::fs::read_dir(account_dataset_dir_path).int_err()?; + let read_dataset_dir = std::fs::read_dir(account_dataset_dir_path).map_err(|_| { + ResolveDatasetError::NotFound(DatasetNotFoundError { + dataset_ref: dataset_alias.as_local_ref(), + }) + })?; for r_dataset_dir in read_dataset_dir { let dataset_dir_entry = r_dataset_dir.int_err()?; @@ -891,6 +994,18 @@ impl DatasetStorageStrategy for DatasetMultiTenantStorageStrategy { Ok(()) } + + fn canonical_dataset_alias( + &self, + raw_alias: &DatasetAlias, + ) -> Result { + Ok(if let Some(account_name) = &raw_alias.account_name { + let (_, canonical_account_name) = self.resolve_account_dir(account_name).int_err()?; + DatasetAlias::new(Some(canonical_account_name), raw_alias.dataset_name.clone()) + } else { + raw_alias.clone() + }) + } } ///////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/utils/datasets_filtering.rs b/src/infra/core/src/utils/datasets_filtering.rs index 5206f5d8b3..4f85b5a701 100644 --- a/src/infra/core/src/utils/datasets_filtering.rs +++ b/src/infra/core/src/utils/datasets_filtering.rs @@ -8,7 +8,6 @@ // by the Apache License, Version 2.0. use std::pin::Pin; -use std::str::FromStr; use std::sync::Arc; use futures::{future, StreamExt, TryStreamExt}; @@ -156,7 +155,7 @@ pub fn matches_remote_ref_pattern( match remote_ref_pattern { DatasetRefAnyPattern::Ref(_) | DatasetRefAnyPattern::PatternLocal(_) => unreachable!(), DatasetRefAnyPattern::PatternAmbiguous(repo_name, dataset_name_pattern) => { - let repo_name = RepoName::from_str(&repo_name.pattern).unwrap(); + let repo_name = RepoName::new_unchecked(&repo_name.pattern); repo_name == dataset_alias_remote.repo_name && dataset_name_pattern.matches(&dataset_alias_remote.dataset_name) } @@ -199,8 +198,9 @@ pub fn matches_local_ref_pattern( dataset_name_pattern.matches(&dataset_handle.alias.dataset_name) } DatasetRefAnyPattern::PatternAmbiguous(account_name, dataset_name_pattern) => { - let account_name = AccountName::from_str(&account_name.pattern).unwrap(); - Some(account_name) == dataset_handle.alias.account_name + let account_name = AccountName::new_unchecked(&account_name.pattern); + (dataset_handle.alias.account_name.is_some() + && &account_name == dataset_handle.alias.account_name.as_ref().unwrap()) && dataset_name_pattern.matches(&dataset_handle.alias.dataset_name) } } diff --git a/src/infra/core/tests/tests/engine/test_engine_io.rs b/src/infra/core/tests/tests/engine/test_engine_io.rs index 088d5f8318..343ee57c38 100644 --- a/src/infra/core/tests/tests/engine/test_engine_io.rs +++ b/src/infra/core/tests/tests/engine/test_engine_io.rs @@ -223,6 +223,8 @@ async fn test_engine_io_local_file_mount() { let tempdir = tempfile::tempdir().unwrap(); let run_info_dir = tempdir.path().join("run"); let cache_dir = tempdir.path().join("cache"); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(&cache_dir).unwrap(); @@ -233,7 +235,7 @@ async fn test_engine_io_local_file_mount() { .add_value(CurrentAccountSubject::new_test()) .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/engine/test_engine_transform.rs b/src/infra/core/tests/tests/engine/test_engine_transform.rs index ae796943cc..c6558fadfd 100644 --- a/src/infra/core/tests/tests/engine/test_engine_transform.rs +++ b/src/infra/core/tests/tests/engine/test_engine_transform.rs @@ -211,8 +211,10 @@ async fn test_transform_common(transform: Transform) { let tempdir = tempfile::tempdir().unwrap(); let run_info_dir = tempdir.path().join("run"); let cache_dir = tempdir.path().join("cache"); + let datasets_dir = tempdir.path().join("datasets"); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(&cache_dir).unwrap(); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add_value(ContainerRuntimeConfig::default()) @@ -223,7 +225,7 @@ async fn test_transform_common(transform: Transform) { .add_value(CurrentAccountSubject::new_test()) .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/ingest/test_polling_ingest.rs b/src/infra/core/tests/tests/ingest/test_polling_ingest.rs index 91f6427a4b..42291a271f 100644 --- a/src/infra/core/tests/tests/ingest/test_polling_ingest.rs +++ b/src/infra/core/tests/tests/ingest/test_polling_ingest.rs @@ -1085,8 +1085,10 @@ impl IngestTestHarness { let temp_dir = tempfile::tempdir().unwrap(); let run_info_dir = temp_dir.path().join("run"); let cache_dir = temp_dir.path().join("cache"); + let datasets_dir = temp_dir.path().join("datasets"); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(&cache_dir).unwrap(); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add_value(ContainerRuntimeConfig::default()) @@ -1100,7 +1102,7 @@ impl IngestTestHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(temp_dir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/ingest/test_push_ingest.rs b/src/infra/core/tests/tests/ingest/test_push_ingest.rs index 4d0b806bbf..e6b6674a3b 100644 --- a/src/infra/core/tests/tests/ingest/test_push_ingest.rs +++ b/src/infra/core/tests/tests/ingest/test_push_ingest.rs @@ -473,8 +473,10 @@ impl IngestTestHarness { let temp_dir = tempfile::tempdir().unwrap(); let run_info_dir = temp_dir.path().join("run"); let cache_dir = temp_dir.path().join("cache"); + let datasets_dir = temp_dir.path().join("datasets"); std::fs::create_dir(&run_info_dir).unwrap(); std::fs::create_dir(cache_dir).unwrap(); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() @@ -484,7 +486,7 @@ impl IngestTestHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(temp_dir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/ingest/test_writer.rs b/src/infra/core/tests/tests/ingest/test_writer.rs index 546d530e48..5037b33122 100644 --- a/src/infra/core/tests/tests/ingest/test_writer.rs +++ b/src/infra/core/tests/tests/ingest/test_writer.rs @@ -933,6 +933,9 @@ struct Harness { impl Harness { async fn new(dataset_events: Vec) -> Self { let temp_dir = tempfile::tempdir().unwrap(); + let datasets_dir = temp_dir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + let system_time = Utc.with_ymd_and_hms(2010, 1, 1, 12, 0, 0).unwrap(); let catalog = dill::CatalogBuilder::new() @@ -942,7 +945,7 @@ impl Harness { .add_value(CurrentAccountSubject::new_test()) .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(temp_dir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/repos/test_dataset_repository_local_fs.rs b/src/infra/core/tests/tests/repos/test_dataset_repository_local_fs.rs index 6ab53836ae..8abd7ed918 100644 --- a/src/infra/core/tests/tests/repos/test_dataset_repository_local_fs.rs +++ b/src/infra/core/tests/tests/repos/test_dataset_repository_local_fs.rs @@ -32,6 +32,9 @@ impl LocalFsRepoHarness { dataset_action_authorizer: TDatasetActionAuthorizer, multi_tenant: bool, ) -> Self { + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + let catalog = dill::CatalogBuilder::new() .add::() .add::() @@ -40,7 +43,7 @@ impl LocalFsRepoHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(multi_tenant), ) .bind::() @@ -299,3 +302,39 @@ async fn test_iterate_datasets_multi_tenant() { } ///////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_create_and_get_case_insensetive_dataset() { + let tempdir = tempfile::tempdir().unwrap(); + let harness = LocalFsRepoHarness::create( + &tempdir, + auth::AlwaysHappyDatasetActionAuthorizer::new(), + false, + ); + + test_dataset_repository_shared::test_create_and_get_case_insensetive_dataset( + harness.dataset_repo.as_ref(), + None, + ) + .await; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_create_and_get_case_insensetive_dataset_multi_tenant() { + let tempdir = tempfile::tempdir().unwrap(); + let harness = LocalFsRepoHarness::create( + &tempdir, + auth::AlwaysHappyDatasetActionAuthorizer::new(), + true, + ); + + test_dataset_repository_shared::test_create_and_get_case_insensetive_dataset( + harness.dataset_repo.as_ref(), + Some(AccountName::new_unchecked(auth::DEFAULT_ACCOUNT_NAME)), + ) + .await; +} + +///////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/tests/repos/test_dataset_repository_s3.rs b/src/infra/core/tests/tests/repos/test_dataset_repository_s3.rs index ee6d99224a..1c5877c704 100644 --- a/src/infra/core/tests/tests/repos/test_dataset_repository_s3.rs +++ b/src/infra/core/tests/tests/repos/test_dataset_repository_s3.rs @@ -396,3 +396,45 @@ async fn test_iterate_datasets_multi_tenant() { } ///////////////////////////////////////////////////////////////////////////////////////// + +#[test_group::group(containerized)] +#[tokio::test] +async fn test_create_and_get_case_insensetive_dataset() { + let s3 = LocalS3Server::new().await; + let harness = S3RepoHarness::create( + &s3, + auth::AlwaysHappyDatasetActionAuthorizer::new(), + false, + false, + ) + .await; + + test_dataset_repository_shared::test_create_and_get_case_insensetive_dataset( + harness.dataset_repo.as_ref(), + None, + ) + .await; +} + +///////////////////////////////////////////////////////////////////////////////////////// + +#[test_group::group(containerized)] +#[tokio::test] +async fn test_create_and_get_case_insensetive_dataset_multi_tenant() { + let s3 = LocalS3Server::new().await; + let harness = S3RepoHarness::create( + &s3, + auth::AlwaysHappyDatasetActionAuthorizer::new(), + true, + false, + ) + .await; + + test_dataset_repository_shared::test_create_and_get_case_insensetive_dataset( + harness.dataset_repo.as_ref(), + Some(AccountName::new_unchecked(auth::DEFAULT_ACCOUNT_NAME)), + ) + .await; +} + +///////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/tests/repos/test_dataset_repository_shared.rs b/src/infra/core/tests/tests/repos/test_dataset_repository_shared.rs index 12074e123f..2afa5d706f 100644 --- a/src/infra/core/tests/tests/repos/test_dataset_repository_shared.rs +++ b/src/infra/core/tests/tests/repos/test_dataset_repository_shared.rs @@ -61,6 +61,89 @@ pub async fn test_create_dataset(repo: &dyn DatasetRepository, account_name: Opt ///////////////////////////////////////////////////////////////////////////////////////// +pub async fn test_create_and_get_case_insensetive_dataset( + repo: &dyn DatasetRepository, + account_name: Option, +) { + let dataset_alias_to_create = + DatasetAlias::new(account_name.clone(), DatasetName::new_unchecked("Foo")); + + assert_matches!( + repo.get_dataset(&dataset_alias_to_create.as_local_ref()) + .await + .err() + .unwrap(), + GetDatasetError::NotFound(_) + ); + + let create_result = repo + .create_dataset( + &dataset_alias_to_create, + MetadataFactory::metadata_block(MetadataFactory::seed(DatasetKind::Root).build()) + .build_typed(), + ) + .await + .unwrap(); + + assert_eq!(create_result.dataset_handle.alias, dataset_alias_to_create); + + let account_name_uppercase = account_name.clone().map(|account_name_value| { + AccountName::new_unchecked(&account_name_value.to_ascii_uppercase()) + }); + + let dataset_alias_in_another_registry = + DatasetAlias::new(account_name_uppercase, DatasetName::new_unchecked("foO")); + + // We should see the dataset + assert!(repo + .get_dataset(&dataset_alias_in_another_registry.as_local_ref()) + .await + .is_ok()); + + // Test creation another dataset for existing account with different symbols + // registry + let new_dataset_alias_to_create = DatasetAlias::new( + account_name + .clone() + .map(|a| AccountName::new_unchecked(a.to_uppercase().as_str())), + DatasetName::new_unchecked("BaR"), + ); + + let snapshot = MetadataFactory::dataset_snapshot() + .name(new_dataset_alias_to_create.clone()) + .kind(DatasetKind::Root) + .push_event(MetadataFactory::set_polling_source().build()) + .build(); + + let create_result = repo.create_dataset_from_snapshot(snapshot).await.unwrap(); + + // Assert dataset_name eq to new alias and account_name eq to old existing one + assert_eq!( + create_result.dataset_handle.alias.dataset_name, + new_dataset_alias_to_create.dataset_name + ); + assert_eq!( + create_result.dataset_handle.alias.account_name, + dataset_alias_to_create.account_name + ); + + // Now test name collision + let create_result = repo + .create_dataset( + &dataset_alias_in_another_registry, + MetadataFactory::metadata_block(MetadataFactory::seed(DatasetKind::Root).build()) + .build_typed(), + ) + .await; + + assert_matches!( + create_result.err(), + Some(CreateDatasetError::NameCollision(_)) + ); +} + +///////////////////////////////////////////////////////////////////////////////////////// + pub async fn test_create_dataset_same_name_multiple_tenants(repo: &dyn DatasetRepository) { let dataset_alias_my = DatasetAlias::new( Some(AccountName::new_unchecked("my")), diff --git a/src/infra/core/tests/tests/test_compact_service_impl.rs b/src/infra/core/tests/tests/test_compact_service_impl.rs index c5d4cd4b35..7871ce58db 100644 --- a/src/infra/core/tests/tests/test_compact_service_impl.rs +++ b/src/infra/core/tests/tests/test_compact_service_impl.rs @@ -231,7 +231,7 @@ async fn test_dataset_compact_watermark_only_blocks() { None, None, CommitOpts { - system_time: Some(harness.current_date_tame), + system_time: Some(harness.current_date_time), ..CommitOpts::default() }, ) @@ -268,7 +268,7 @@ async fn test_dataset_compact_watermark_only_blocks() { None, None, CommitOpts { - system_time: Some(harness.current_date_tame), + system_time: Some(harness.current_date_time), ..CommitOpts::default() }, ) @@ -696,24 +696,18 @@ async fn test_dataset_compact_derive_error() { let created = harness .create_dataset( MetadataFactory::dataset_snapshot() - .name("derive-foo") + .name("derive.foo") .kind(DatasetKind::Derivative) .push_event(MetadataFactory::set_data_schema().build()) .build(), ) .await; - let dataset_handle = harness - .dataset_repo - .resolve_dataset_ref(&created.dataset_handle.as_local_ref()) - .await - .unwrap(); - assert_matches!( harness .compact_svc .compact_dataset( - &dataset_handle, + &created.dataset_handle, MAX_SLICE_SIZE, MAX_SLICE_RECORDS, Some(Arc::new(NullCompactionMultiListener {})) @@ -841,40 +835,40 @@ async fn test_large_dataset_compact() { ///////////////////////////////////////////////////////////////////////////////////////// struct CompactTestHarness { + _temp_dir: tempfile::TempDir, dataset_repo: Arc, compact_svc: Arc, push_ingest_svc: Arc, verification_svc: Arc, - current_date_tame: DateTime, + current_date_time: DateTime, ctx: SessionContext, } impl CompactTestHarness { fn new() -> Self { - Self::new_local_with_authorizer(kamu_core::auth::AlwaysHappyDatasetActionAuthorizer::new()) + Self::new_local() } - fn new_local_with_authorizer( - dataset_action_authorizer: TDatasetAuthorizer, - ) -> Self { + fn new_local() -> Self { let temp_dir = tempfile::tempdir().unwrap(); let run_info_dir = temp_dir.path().join("run"); + let datasets_dir = temp_dir.path().join("datasets"); std::fs::create_dir(&run_info_dir).unwrap(); - let current_date_tame = Utc.with_ymd_and_hms(2050, 1, 1, 12, 0, 0).unwrap(); + std::fs::create_dir(&datasets_dir).unwrap(); + let current_date_time = Utc.with_ymd_and_hms(2050, 1, 1, 12, 0, 0).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() .add::() .add_value(CurrentAccountSubject::new_test()) - .add_value(dataset_action_authorizer) - .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(temp_dir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() - .add_value(SystemTimeSourceStub::new_set(current_date_tame)) + .add::() + .add_value(SystemTimeSourceStub::new_set(current_date_time)) .bind::() .add::() .add_builder(CompactServiceImpl::builder().with_run_info_dir(run_info_dir.clone())) @@ -899,11 +893,12 @@ impl CompactTestHarness { let verification_svc = catalog.get_one::().unwrap(); Self { + _temp_dir: temp_dir, dataset_repo, compact_svc, push_ingest_svc, verification_svc, - current_date_tame, + current_date_time, ctx: SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)), } } @@ -1018,7 +1013,7 @@ impl CompactTestHarness { event.into(), CommitOpts { block_ref: &BlockRef::Head, - system_time: Some(self.current_date_tame), + system_time: Some(self.current_date_time), prev_block_hash: Some(Some(head)), check_object_refs: false, update_block_ref: true, diff --git a/src/infra/core/tests/tests/test_query_service_impl.rs b/src/infra/core/tests/tests/test_query_service_impl.rs index a554463110..2fef239f03 100644 --- a/src/infra/core/tests/tests/test_query_service_impl.rs +++ b/src/infra/core/tests/tests/test_query_service_impl.rs @@ -115,12 +115,15 @@ fn create_catalog_with_local_workspace( tempdir: &Path, dataset_action_authorizer: MockDatasetActionAuthorizer, ) -> dill::Catalog { + let datasets_dir = tempdir.join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + dill::CatalogBuilder::new() .add::() .add::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/test_reset_service_impl.rs b/src/infra/core/tests/tests/test_reset_service_impl.rs index 92f0edd4ef..776ace7e17 100644 --- a/src/infra/core/tests/tests/test_reset_service_impl.rs +++ b/src/infra/core/tests/tests/test_reset_service_impl.rs @@ -111,6 +111,8 @@ struct ResetTestHarness { impl ResetTestHarness { fn new() -> Self { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() @@ -120,7 +122,7 @@ impl ResetTestHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/test_search_service_impl.rs b/src/infra/core/tests/tests/test_search_service_impl.rs index abae1de3e7..4f108bc3ba 100644 --- a/src/infra/core/tests/tests/test_search_service_impl.rs +++ b/src/infra/core/tests/tests/test_search_service_impl.rs @@ -22,6 +22,8 @@ async fn do_test_search(tmp_workspace_dir: &Path, repo_url: Url) { let dataset_local_alias = DatasetAlias::new(None, DatasetName::new_unchecked("foo")); let repo_name = RepoName::new_unchecked("repo"); let dataset_remote_alias = DatasetAliasRemote::try_from("repo/bar").unwrap(); + let datasets_dir = tmp_workspace_dir.join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() @@ -29,7 +31,7 @@ async fn do_test_search(tmp_workspace_dir: &Path, repo_url: Url) { .add_value(CurrentAccountSubject::new_test()) .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tmp_workspace_dir.join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/test_sync_service_impl.rs b/src/infra/core/tests/tests/test_sync_service_impl.rs index e69f7f216e..abaaf73b99 100644 --- a/src/infra/core/tests/tests/test_sync_service_impl.rs +++ b/src/infra/core/tests/tests/test_sync_service_impl.rs @@ -83,6 +83,9 @@ async fn do_test_sync( &dataset_alias_2, ); + let datasets_dir = tmp_workspace_dir.join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); + let catalog = dill::CatalogBuilder::new() .add::() .add::() @@ -93,7 +96,7 @@ async fn do_test_sync( .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tmp_workspace_dir.join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::() diff --git a/src/infra/core/tests/tests/test_transform_service_impl.rs b/src/infra/core/tests/tests/test_transform_service_impl.rs index dfdea79ad3..87207b7f41 100644 --- a/src/infra/core/tests/tests/test_transform_service_impl.rs +++ b/src/infra/core/tests/tests/test_transform_service_impl.rs @@ -40,6 +40,8 @@ impl TransformTestHarness { engine_provisioner: TEngineProvisioner, ) -> Self { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let catalog = dill::CatalogBuilder::new() .add::() @@ -51,7 +53,7 @@ impl TransformTestHarness { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .add::() diff --git a/src/infra/core/tests/tests/test_verification_service_impl.rs b/src/infra/core/tests/tests/test_verification_service_impl.rs index 0c86be8552..adb7620c6c 100644 --- a/src/infra/core/tests/tests/test_verification_service_impl.rs +++ b/src/infra/core/tests/tests/test_verification_service_impl.rs @@ -25,6 +25,8 @@ use super::test_pull_service_impl::TestTransformService; #[tokio::test] async fn test_verify_data_consistency() { let tempdir = tempfile::tempdir().unwrap(); + let datasets_dir = tempdir.path().join("datasets"); + std::fs::create_dir(&datasets_dir).unwrap(); let dataset_alias = DatasetAlias::new(None, DatasetName::new_unchecked("bar")); @@ -38,7 +40,7 @@ async fn test_verify_data_consistency() { .bind::() .add_builder( DatasetRepositoryLocalFs::builder() - .with_root(tempdir.path().join("datasets")) + .with_root(datasets_dir) .with_multi_tenant(false), ) .bind::()