diff --git a/gcsfs/tests/conftest.py b/gcsfs/tests/conftest.py index 40129faf..2efd177e 100644 --- a/gcsfs/tests/conftest.py +++ b/gcsfs/tests/conftest.py @@ -25,6 +25,7 @@ TEST_VERSIONED_BUCKET, TEST_ZONAL_BUCKET, ) +from gcsfs.tests.utils import is_real_gcs files = { "test/accounts.1.json": ( @@ -60,13 +61,6 @@ "zonal/test/c": b"ab\n" + b"a" * (2**18) + b"\nab", } -_MULTI_THREADED_TEST_DATA_SIZE = 5 * 1024 * 1024 # 5MB -pattern = b"0123456789abcdef" -text_files["multi_threaded_test_file"] = ( - pattern * (_MULTI_THREADED_TEST_DATA_SIZE // len(pattern)) - + pattern[: _MULTI_THREADED_TEST_DATA_SIZE % len(pattern)] -) - allfiles = dict(**files, **csv_files, **text_files) a = TEST_BUCKET + "/tmp/test/a" b = TEST_BUCKET + "/tmp/test/b" @@ -82,6 +76,37 @@ } +@pytest.fixture(autouse=True) +def _avoid_adc_timeout(monkeypatch): + """Avoid slow ADC lookups and Metadata Server requests in tests.""" + # Do not apply if tests are explicitly running against real GCS + if is_real_gcs(): + yield + return + + # Disable GCE metadata check in google-auth and gcsfs + monkeypatch.setenv("NO_GCE_CHECK", "true") + + # Set a dummy project to avoid project ID lookup timeouts if not set + if "GOOGLE_CLOUD_PROJECT" not in os.environ: + monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "dummy-project") + + yield + + +@pytest.fixture(autouse=True) +def _mock_get_bucket_type_on_emulator(): + """Mock _get_bucket_type to return UNKNOWN instantly on emulator.""" + if not is_real_gcs(): + with mock.patch( + "gcsfs.extended_gcsfs.ExtendedGcsFileSystem._get_bucket_type", + return_value=BucketType.UNKNOWN, + ): + yield + else: + yield + + def stop_docker(container): cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container) cid = subprocess.check_output(cmd).strip().decode() @@ -92,6 +117,8 @@ def stop_docker(container): @pytest.fixture(scope="session") def docker_gcs(): if "STORAGE_EMULATOR_HOST" in os.environ: + if not is_real_gcs(): + params["token"] = "anon" # assume using real API or otherwise have a server already set up yield os.getenv("STORAGE_EMULATOR_HOST") return @@ -290,9 +317,6 @@ def final_cleanup(gcs_factory, buckets_to_delete): def gcs_versioned(gcs_factory, buckets_to_delete): gcs = gcs_factory() gcs.version_aware = True - is_real_gcs = ( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" - ) try: # ensure we're empty. # The versioned bucket might be created by `is_versioning_enabled` # in test_core_versioned.py. We must register it for cleanup only if @@ -306,7 +330,7 @@ def gcs_versioned(gcs_factory, buckets_to_delete): buckets_to_delete.add(TEST_VERSIONED_BUCKET) except ImportError: pass # test_core_versioned is not being run - if is_real_gcs: + if is_real_gcs(): cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET) else: # For emulators, we delete and recreate the bucket for a clean state @@ -321,7 +345,7 @@ def gcs_versioned(gcs_factory, buckets_to_delete): finally: # Ensure the bucket is empty after the test. try: - if is_real_gcs: + if is_real_gcs(): cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET) except Exception as e: logging.warning( @@ -367,13 +391,9 @@ def cleanup_versioned_bucket(gcs, bucket_name, prefix=None): def _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate_bucket, **kwargs): - is_real_gcs = ( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" - ) - extended_gcsfs = gcs_factory(**kwargs) # Only create/delete/populate the bucket if we are NOT using the real GCS endpoint. - if not is_real_gcs: + if not is_real_gcs(): if not extended_gcsfs.exists(TEST_ZONAL_BUCKET): extended_gcsfs.mkdir(TEST_ZONAL_BUCKET) buckets_to_delete.add(TEST_ZONAL_BUCKET) @@ -433,7 +453,7 @@ def gcs_hns(gcs_factory, buckets_to_delete): def zonal_write_mocks(): """A fixture for mocking Zonal bucket write functionality.""" - if os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com": + if is_real_gcs(): yield None return diff --git a/gcsfs/tests/integration/test_async_gcsfs.py b/gcsfs/tests/integration/test_async_gcsfs.py index 7b1345c9..8a94fb4c 100644 --- a/gcsfs/tests/integration/test_async_gcsfs.py +++ b/gcsfs/tests/integration/test_async_gcsfs.py @@ -20,6 +20,7 @@ from gcsfs.extended_gcsfs import ExtendedGcsFileSystem from gcsfs.tests.settings import TEST_HNS_BUCKET +from gcsfs.tests.utils import is_real_gcs REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" @@ -34,7 +35,7 @@ reason=f"Skipping tests: {REQUIRED_ENV_VAR} env variable is not set", ), pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com", + not is_real_gcs(), reason="Skipping tests on emulator, requires real GCS.", ), ] diff --git a/gcsfs/tests/integration/test_extended_hns.py b/gcsfs/tests/integration/test_extended_hns.py index 606055e1..c1f16267 100644 --- a/gcsfs/tests/integration/test_extended_hns.py +++ b/gcsfs/tests/integration/test_extended_hns.py @@ -19,6 +19,7 @@ from gcsfs.extended_gcsfs import BucketType, ExtendedGcsFileSystem from gcsfs.tests.settings import TEST_HNS_BUCKET, TEST_PROJECT +from gcsfs.tests.utils import is_real_gcs should_run_hns = os.getenv("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false").lower() in ( "true", @@ -27,8 +28,7 @@ # Skip these tests if not running against a real GCS backend or if experimentation flag is not set. pytestmark = pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com" - or not should_run_hns, + not is_real_gcs() or not should_run_hns, reason="This test class is for real GCS HNS buckets only and requires experimental flag.", ) diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index dd6cd9a4..1fb102db 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -487,8 +487,7 @@ def test_rm_recursive(gcs): def test_rm_chunked_batch(gcs): files = [f"{TEST_BUCKET}/t{i}" for i in range(303)] - for fn in files: - gcs.touch(fn) + gcs.pipe({fn: b"" for fn in files}) files_created = gcs.find(TEST_BUCKET) for fn in files: @@ -511,8 +510,7 @@ def test_rm_wildcards_in_directory(gcs): f"{base_dir}/b1.dat", f"{base_dir}/subdir/nested.txt", ] - for f in files: - gcs.touch(f) + gcs.pipe({f: b"" for f in files}) # 1. Test '?' wildcard (non-recursive) gcs.rm(f"{base_dir}/file?.txt") @@ -2019,7 +2017,6 @@ def test_find_dircache(gcs): f"{TEST_BUCKET}/2014-01-01.csv", f"{TEST_BUCKET}/2014-01-02.csv", f"{TEST_BUCKET}/2014-01-03.csv", - f"{TEST_BUCKET}/multi_threaded_test_file", f"{TEST_BUCKET}/zonal", } assert set(gcs.ls(f"{TEST_BUCKET}/nested")) == { diff --git a/gcsfs/tests/test_core_versioned.py b/gcsfs/tests/test_core_versioned.py index 0b821acb..d6747cea 100644 --- a/gcsfs/tests/test_core_versioned.py +++ b/gcsfs/tests/test_core_versioned.py @@ -7,6 +7,7 @@ from gcsfs import GCSFileSystem from gcsfs.tests.settings import TEST_VERSIONED_BUCKET +from gcsfs.tests.utils import is_real_gcs a = TEST_VERSIONED_BUCKET + "/tmp/test/a" b = TEST_VERSIONED_BUCKET + "/tmp/test/b" @@ -22,7 +23,7 @@ def is_versioning_enabled(): """ # Don't skip when using an emulator, as we create the versioned bucket ourselves. global _VERSIONED_BUCKET_CREATED_BY_TESTS - if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com": + if not is_real_gcs(): return True, "" try: gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project")) diff --git a/gcsfs/tests/test_credentials.py b/gcsfs/tests/test_credentials.py index af9d47df..64fcba65 100644 --- a/gcsfs/tests/test_credentials.py +++ b/gcsfs/tests/test_credentials.py @@ -45,6 +45,43 @@ def test_connect_google_default_uses_request(): assert isinstance(kwargs["request"], Request) +def test_connect_cloud_success(): + with patch("google.auth.compute_engine.Credentials") as mock_creds_class: + mock_creds = Mock() + mock_creds_class.return_value = mock_creds + + cred = GoogleCredentials( + project="my-project", access="read_only", token="cloud", on_google=True + ) + + assert cred.credentials == mock_creds + assert mock_creds.refresh.called + assert cred.method == "cloud" + + +def test_connect_cloud_failure(): + import google.auth.exceptions + + with patch("google.auth.compute_engine.Credentials") as mock_creds_class: + mock_creds = Mock() + mock_creds_class.return_value = mock_creds + mock_creds.refresh.side_effect = google.auth.exceptions.RefreshError( + "mock error" + ) + + with pytest.raises(ValueError, match="Invalid gcloud credentials"): + GoogleCredentials( + project="my-project", access="read_only", token="cloud", on_google=True + ) + + +def test_connect_cloud_not_on_google(): + with pytest.raises(ValueError): + GoogleCredentials( + project="my-project", access="read_only", token="cloud", on_google=False + ) + + @pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100]) def test_credentials_from_raw_token(token): with patch.dict(os.environ, {"FETCH_RAW_TOKEN_EXPIRY": "false"}): diff --git a/gcsfs/tests/test_extended_gcsfs.py b/gcsfs/tests/test_extended_gcsfs.py index 3598a3a6..906f8f22 100644 --- a/gcsfs/tests/test_extended_gcsfs.py +++ b/gcsfs/tests/test_extended_gcsfs.py @@ -30,14 +30,9 @@ simple_upload, upload_chunk, ) -from gcsfs.tests.conftest import ( - _MULTI_THREADED_TEST_DATA_SIZE, - csv_files, - files, - text_files, -) +from gcsfs.tests.conftest import csv_files, files, text_files from gcsfs.tests.settings import TEST_BUCKET, TEST_ZONAL_BUCKET -from gcsfs.tests.utils import tempdir, tmpfile +from gcsfs.tests.utils import is_real_gcs, tempdir, tmpfile from gcsfs.zb_hns_utils import MRDPool file = "test/accounts.1.json" @@ -73,10 +68,7 @@ def gcs_bucket_mocks(): @contextlib.contextmanager def _gcs_bucket_mocks_factory(file_data, bucket_type_val): """Creates mocks for a given file content and bucket type.""" - is_real_gcs = ( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" - ) - if is_real_gcs: + if is_real_gcs(): yield None return patch_target_lookup_bucket_type = ( @@ -342,10 +334,21 @@ async def test_cat_file_on_unfinalized_file(extended_gcsfs, file_path): # ========================== Zonal Multithreaded Read Tests =========================== +_MULTI_THREADED_TEST_DATA_SIZE = 5 * 1024 * 1024 # 5MB _MULTI_THREADED_TEST_FILE = "multi_threaded_test_file" -_MULTI_THREADED_TEST_DATA = text_files[_MULTI_THREADED_TEST_FILE] +pattern = b"0123456789abcdef" +_MULTI_THREADED_TEST_DATA = ( + pattern * (_MULTI_THREADED_TEST_DATA_SIZE // len(pattern)) + + pattern[: _MULTI_THREADED_TEST_DATA_SIZE % len(pattern)] +) _MULTI_THREADED_TEST_FILE_PATH = f"{TEST_ZONAL_BUCKET}/{_MULTI_THREADED_TEST_FILE}" + +@pytest.fixture +def multi_threaded_test_file(extended_gcsfs): + extended_gcsfs.pipe(_MULTI_THREADED_TEST_FILE_PATH, _MULTI_THREADED_TEST_DATA) + + _TEST_BLOCK_SIZE_FOR_CHUNK_BOUNDARY = 1 * 1024 * 1024 # 1MB _NUM_CONCURRENCY_THREADS = 10 _READ_LENGTH_CONCURRENCY = 1024 # 1KB @@ -371,7 +374,9 @@ def _read_range_from_fs(fs, path, offset, length, block_size=None): return f.read(length) -def test_multithreaded_read_disjoint_ranges_zb(extended_gcsfs, gcs_bucket_mocks): +def test_multithreaded_read_disjoint_ranges_zb( + extended_gcsfs, gcs_bucket_mocks, multi_threaded_test_file +): """ Tests concurrent reads of disjoint ranges from the same file. Verifies that different parts of the file can be fetched simultaneously without data mix-up. @@ -399,7 +404,9 @@ def test_multithreaded_read_disjoint_ranges_zb(extended_gcsfs, gcs_bucket_mocks) assert mocks["downloader"].close.call_count == len(read_tasks) -def test_multithreaded_read_overlapping_ranges_zb(extended_gcsfs, gcs_bucket_mocks): +def test_multithreaded_read_overlapping_ranges_zb( + extended_gcsfs, gcs_bucket_mocks, multi_threaded_test_file +): """ Tests concurrent reads of overlapping ranges from the same file. """ @@ -443,7 +450,9 @@ def test_default_cache_is_readahead_chunked(extended_gcsfs, gcs_bucket_mocks): assert isinstance(f.cache, caching.ReadAheadChunked) -def test_multithreaded_read_chunk_boundary_zb(extended_gcsfs, gcs_bucket_mocks): +def test_multithreaded_read_chunk_boundary_zb( + extended_gcsfs, gcs_bucket_mocks, multi_threaded_test_file +): """ Tests concurrent reads that straddle internal buffering chunk boundaries. Verifies correct stitching of data from multiple internal requests. @@ -522,7 +531,9 @@ def _read_random_range(fs, path, file_size, read_length): return f.read(read_length) -def test_multithreaded_read_high_concurrency_zb(extended_gcsfs, gcs_bucket_mocks): +def test_multithreaded_read_high_concurrency_zb( + extended_gcsfs, gcs_bucket_mocks, multi_threaded_test_file +): """ Tests high-concurrency reads to stress the connection pooling and handling. Verifies that many concurrent requests do not lead to crashes or deadlocks. @@ -559,7 +570,7 @@ def test_multithreaded_read_high_concurrency_zb(extended_gcsfs, gcs_bucket_mocks def test_multithreaded_read_one_fails_others_survive_zb( - extended_gcsfs, gcs_bucket_mocks + extended_gcsfs, gcs_bucket_mocks, multi_threaded_test_file ): """ Tests fault tolerance: one thread's read operation fails, but others complete successfully. @@ -665,7 +676,7 @@ def _read_range_and_get_pid(path, offset, length, block_size=None): return data, os.getpid() -def test_multiprocess_read_disjoint_ranges_zb(extended_gcsfs): +def test_multiprocess_read_disjoint_ranges_zb(extended_gcsfs, multi_threaded_test_file): """ Tests concurrent reads of disjoint ranges from the same file in different processes. """ @@ -694,7 +705,9 @@ def test_multiprocess_read_disjoint_ranges_zb(extended_gcsfs): assert os.getpid() not in pids -def test_multiprocess_read_overlapping_ranges_zb(extended_gcsfs): +def test_multiprocess_read_overlapping_ranges_zb( + extended_gcsfs, multi_threaded_test_file +): """ Tests concurrent reads of overlapping ranges from the same file in different processes. """ @@ -733,7 +746,7 @@ def _read_with_passed_fs(fs, path, offset, length): return f.read(length) -def test_multiprocess_shared_fs_zb(extended_gcsfs): +def test_multiprocess_shared_fs_zb(extended_gcsfs, multi_threaded_test_file): """ Tests passing the filesystem object itself to child processes. """ @@ -1109,10 +1122,9 @@ async def test_cp_file_not_implemented_error( short_uuid = str(uuid.uuid4())[:8] source_path = f"{source_bucket}/source_{short_uuid}" dest_path = f"{dest_bucket}/dest_{short_uuid}" - is_real_gcs = os.getenv("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" # Source file needs to exist for last case when super method is called for standard buckets - if is_real_gcs: + if is_real_gcs(): await async_gcs._pipe_file(source_path, b"test data", finalize_on_close=True) async def mock_is_zonal(bucket): @@ -1120,7 +1132,7 @@ async def mock_is_zonal(bucket): is_zonal_patch_cm = ( mock.patch.object(async_gcs, "_is_zonal_bucket", side_effect=mock_is_zonal) - if not is_real_gcs + if not is_real_gcs() else contextlib.nullcontext() ) @@ -1135,7 +1147,7 @@ async def mock_is_zonal(bucket): ): await async_gcs._cp_file(source_path, dest_path) else: # Standard -> Standard - if is_real_gcs: + if is_real_gcs(): await async_gcs._cp_file(source_path, dest_path) assert await async_gcs._cat(dest_path) == b"test data" else: diff --git a/gcsfs/tests/test_extended_gcsfs_unit.py b/gcsfs/tests/test_extended_gcsfs_unit.py index 2ba17288..3250bf2a 100644 --- a/gcsfs/tests/test_extended_gcsfs_unit.py +++ b/gcsfs/tests/test_extended_gcsfs_unit.py @@ -21,7 +21,7 @@ from gcsfs.tests.conftest import csv_files, files from gcsfs.tests.settings import TEST_BUCKET, TEST_ZONAL_BUCKET from gcsfs.tests.test_extended_gcsfs import gcs_bucket_mocks # noqa: F401 -from gcsfs.tests.utils import tmpfile +from gcsfs.tests.utils import is_real_gcs, tmpfile file = "test/accounts.1.json" file_path = f"{TEST_ZONAL_BUCKET}/{file}" @@ -46,7 +46,7 @@ reason=f"Skipping tests: {REQUIRED_ENV_VAR} env variable is not set", ), pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com", + is_real_gcs(), reason="Contains Unit tests using mocks, does not require testing on real GCS.", ), ] diff --git a/gcsfs/tests/test_extended_hns_gcsfs.py b/gcsfs/tests/test_extended_hns_gcsfs.py index d8656f4e..103c69dd 100644 --- a/gcsfs/tests/test_extended_hns_gcsfs.py +++ b/gcsfs/tests/test_extended_hns_gcsfs.py @@ -18,6 +18,7 @@ from gcsfs.extended_gcsfs import BucketType, ExtendedGcsFileSystem from gcsfs.retry import DEFAULT_RETRY_CONFIG, HttpError from gcsfs.tests.settings import TEST_HNS_BUCKET +from gcsfs.tests.utils import is_real_gcs REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" @@ -754,7 +755,7 @@ def test_hns_mkdir_nested_fails_if_create_parents_false( mocks["super_mkdir"].assert_not_called() @pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com", + is_real_gcs(), reason="This test is only to check that create_folder is not called for non-HNS buckets.", ) def test_mkdir_non_hns_bucket_falls_back(self, gcs_hns, gcs_hns_mocks): @@ -1591,7 +1592,7 @@ def test_hns_rmdir_success(self, gcs_hns, gcs_hns_mocks): mocks["super_rmdir"].assert_not_called() @pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com", + is_real_gcs(), reason="This test is only to check that delete_folder is not called in case of non-HNS buckets." "In real GCS on non-HNS bucket there would be no empty directories to delete.", ) diff --git a/gcsfs/tests/test_zonal_file.py b/gcsfs/tests/test_zonal_file.py index 008cbad3..0691c43e 100644 --- a/gcsfs/tests/test_zonal_file.py +++ b/gcsfs/tests/test_zonal_file.py @@ -9,7 +9,7 @@ ) from gcsfs.tests.settings import TEST_ZONAL_BUCKET -from gcsfs.tests.utils import tempdir, tmpfile +from gcsfs.tests.utils import is_real_gcs, tempdir, tmpfile from gcsfs.zonal_file import ZonalFile test_data = b"hello world" @@ -354,7 +354,7 @@ def test_zonal_file_append_to_empty(extended_gcsfs, zonal_write_mocks, file_path @pytest.mark.skipif( - os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com", + not is_real_gcs(), reason="This test class is for real GCS only.", ) class TestZonalFileRealGCS: diff --git a/gcsfs/tests/utils.py b/gcsfs/tests/utils.py index 720d08fa..a3e93b3b 100644 --- a/gcsfs/tests/utils.py +++ b/gcsfs/tests/utils.py @@ -40,3 +40,8 @@ def tmpfile(extension="", dir=None): else: with ignoring(OSError): os.remove(filename) + + +def is_real_gcs(): + """Checks if tests are explicitly running against real GCS.""" + return os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"