From 3370cbf55255cc3ee5316d37fe430a82e16f78c4 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Mon, 9 Mar 2026 10:32:27 -0700 Subject: [PATCH 1/5] added mv() --- adlfs/spec.py | 44 +++++++++++++++++-------- adlfs/tests/test_spec.py | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 13 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index f04f1491..f76f9dde 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1340,21 +1340,19 @@ async def _separate_directory_markers_for_non_empty_directories( A directory marker is an empty blob who's name is the path of the directory. """ unique_sorted_file_paths = sorted(set(file_paths)) # Remove duplicates and sort - directory_markers = [] - files = [ - unique_sorted_file_paths[-1] - ] # The last file lexographically cannot be a directory marker for a non-empty directory. + prefix_set = set() + for fp in unique_sorted_file_paths: + parts = fp.split("/") + for i in range(1, len(parts)): + prefix_set.add("/".join(parts[:i])) - for file, next_file in zip( - unique_sorted_file_paths, unique_sorted_file_paths[1:] - ): - # /path/to/directory -- directory marker - # /path/to/directory/file -- file in directory - # /path/to/directory2/file -- file in different directory - if next_file.startswith(file + "/"): - directory_markers.append(file) + directory_markers = [] + files = [] + for fp in unique_sorted_file_paths: + if fp in prefix_set: + directory_markers.append(fp) else: - files.append(file) + files.append(fp) return files, directory_markers @@ -1381,6 +1379,23 @@ async def _rmdir(self, path: str, delimiter="/", **kwargs): await self.service_client.delete_container(container_name) self.invalidate_cache(_ROOT_PATH) + async def _mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): + """Move file(s) from one location to another.""" + if path1 == path2: + logger.debug("%s mv: The paths are the same, so no files were moved.", self) + return + + is_dir = await self._isdir(path1) + if is_dir and not recursive: + recursive = True + + await self._copy( + path1, path2, recursive=recursive, maxdepth=maxdepth, on_error="raise" + ) + await self._rm(path1, recursive=recursive) + + mv = sync_wrapper(_mv) + def size(self, path): return sync(self.loop, self._size, path) @@ -1794,6 +1809,9 @@ async def _cp_file(self, path1, path2, **kwargs): container1, blob1, version_id = self.split_path(path1, delimiter="/") container2, blob2, _ = self.split_path(path2, delimiter="/") + if await self._isdir(path1): + return + cc1 = self.service_client.get_container_client(container1) blobclient1 = cc1.get_blob_client(blob=blob1) if container1 == container2: diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 5a828e93..73785f65 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2511,3 +2511,72 @@ class TestCloseCredential: async def test_close_credential(self, credential): file_obj = SimpleNamespace(credential=credential) await close_credential(file_obj) + + +def test_mv_single_file(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + fs.mkdir("mvcontainer") + + with fs.open("mvcontainer/srcdir/file.txt", "wb") as f: + f.write(b"hello") + + fs.mv("mvcontainer/srcdir/file.txt", "mvcontainer/dstdir/") + + assert fs.exists("mvcontainer/dstdir/file.txt") + assert not fs.exists("mvcontainer/srcdir/file.txt") + assert fs.cat_file("mvcontainer/dstdir/file.txt") == b"hello" + + fs.rm("mvcontainer", recursive=True) + + +def test_mv_directory(storage): + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + fs.mkdir("mvcontainer") + + with fs.open("mvcontainer/srcdir/file.txt", "wb") as f: + f.write(b"hello") + + fs.mv("mvcontainer/srcdir", "mvcontainer/dstdir/") + + assert fs.exists("mvcontainer/dstdir/srcdir/file.txt") + assert not fs.exists("mvcontainer/srcdir/") + assert fs.cat_file("mvcontainer/dstdir/srcdir/file.txt") == b"hello" + + fs.rm("mvcontainer", recursive=True) + + +@pytest.mark.parametrize( + "src_files,expected_dst_files", + [ + pytest.param( + {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, + {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, + ), + pytest.param( + {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, + {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, + ), + ], +) +def test_mv_directory_structures(storage, src_files, expected_dst_files): + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + fs.mkdir("mvcontainer") + + for path, content in src_files.items(): + with fs.open(f"mvcontainer/src/{path}", "wb") as f: + f.write(content) + + fs.mv("mvcontainer/src/", "mvcontainer/dst/", recursive=True) + + for path, content in expected_dst_files.items(): + assert fs.exists(f"mvcontainer/dst/{path}") + assert fs.cat_file(f"mvcontainer/dst/{path}") == content + assert not fs.exists(f"mvcontainer/src/{path}") + + fs.rm("mvcontainer", recursive=True) From d2a25ace8cb6b146c4d1fcd46b44f9423d280249 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Thu, 12 Mar 2026 18:16:08 -0700 Subject: [PATCH 2/5] updates --- CHANGELOG.md | 1 + adlfs/tests/test_spec.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2950738..6c579a7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Unreleased - Added `**kwargs` to `AzureBlobFileSystem.exists()` - Populate `AzureBlobFile.version_id` on write when `version_aware` is enabled. - Fixed issue where unawaitable Credential types were incorrectly awaited (#431) +- Added `AzureBlobFileSystem.mv()` to address issue of files not being deleted when they are moved. 2026.2.0 -------- diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 73785f65..0025d70f 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2552,11 +2552,11 @@ def test_mv_directory(storage): @pytest.mark.parametrize( "src_files,expected_dst_files", [ - pytest.param( + ( {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, ), - pytest.param( + ( {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, ), From 78d90c5d75fd97a52aa3d00d95773100d7f068a5 Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 17 Mar 2026 17:01:24 -0700 Subject: [PATCH 3/5] updates --- adlfs/spec.py | 2 +- adlfs/tests/test_spec.py | 29 +++++++---------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index f76f9dde..1d53e078 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1809,7 +1809,7 @@ async def _cp_file(self, path1, path2, **kwargs): container1, blob1, version_id = self.split_path(path1, delimiter="/") container2, blob2, _ = self.split_path(path2, delimiter="/") - if await self._isdir(path1): + if version_id is None and await self._isdir(path1): return cc1 = self.service_client.get_container_client(container1) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 0025d70f..52688f86 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2549,34 +2549,19 @@ def test_mv_directory(storage): fs.rm("mvcontainer", recursive=True) -@pytest.mark.parametrize( - "src_files,expected_dst_files", - [ - ( - {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, - {"a/b/file.txt": b"test 1", "a/file.txt": b"test 2"}, - ), - ( - {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, - {"a/file.txt": b"test 3", "a1/file.txt": b"test 4"}, - ), - ], -) -def test_mv_directory_structures(storage, src_files, expected_dst_files): +def test_mv_directory_structures(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR ) fs.mkdir("mvcontainer") - for path, content in src_files.items(): - with fs.open(f"mvcontainer/src/{path}", "wb") as f: - f.write(content) + with fs.open("mvcontainer/a/b/c/file.txt", "wb") as f: + f.write(b"content") - fs.mv("mvcontainer/src/", "mvcontainer/dst/", recursive=True) + fs.mv("mvcontainer/a/b/c", "mvcontainer/a/c", recursive=True) - for path, content in expected_dst_files.items(): - assert fs.exists(f"mvcontainer/dst/{path}") - assert fs.cat_file(f"mvcontainer/dst/{path}") == content - assert not fs.exists(f"mvcontainer/src/{path}") + assert fs.exists("mvcontainer/a/c/file.txt") + assert not fs.exists("mvcontainer/a/b/c") + assert fs.cat_file("mvcontainer/a/c/file.txt") == b"content" fs.rm("mvcontainer", recursive=True) From a5d72ea8eda044ee27173aa5faaeb5bcd90409ef Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 17 Mar 2026 17:13:51 -0700 Subject: [PATCH 4/5] removed unnecessary code --- CHANGELOG.md | 2 +- adlfs/spec.py | 17 ----------------- adlfs/tests/test_spec.py | 4 ++-- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c579a7a..a56da80d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Unreleased - Added `**kwargs` to `AzureBlobFileSystem.exists()` - Populate `AzureBlobFile.version_id` on write when `version_aware` is enabled. - Fixed issue where unawaitable Credential types were incorrectly awaited (#431) -- Added `AzureBlobFileSystem.mv()` to address issue of files not being deleted when they are moved. +- Fixed a bug where moving files does not delete them from the original location 2026.2.0 -------- diff --git a/adlfs/spec.py b/adlfs/spec.py index 1d53e078..12792aa3 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1379,23 +1379,6 @@ async def _rmdir(self, path: str, delimiter="/", **kwargs): await self.service_client.delete_container(container_name) self.invalidate_cache(_ROOT_PATH) - async def _mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): - """Move file(s) from one location to another.""" - if path1 == path2: - logger.debug("%s mv: The paths are the same, so no files were moved.", self) - return - - is_dir = await self._isdir(path1) - if is_dir and not recursive: - recursive = True - - await self._copy( - path1, path2, recursive=recursive, maxdepth=maxdepth, on_error="raise" - ) - await self._rm(path1, recursive=recursive) - - mv = sync_wrapper(_mv) - def size(self, path): return sync(self.loop, self._size, path) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 52688f86..f457cc33 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -2540,7 +2540,7 @@ def test_mv_directory(storage): with fs.open("mvcontainer/srcdir/file.txt", "wb") as f: f.write(b"hello") - fs.mv("mvcontainer/srcdir", "mvcontainer/dstdir/") + fs.mv("mvcontainer/srcdir", "mvcontainer/dstdir/", recursive=True) assert fs.exists("mvcontainer/dstdir/srcdir/file.txt") assert not fs.exists("mvcontainer/srcdir/") @@ -2549,7 +2549,7 @@ def test_mv_directory(storage): fs.rm("mvcontainer", recursive=True) -def test_mv_directory_structures(storage): +def test_mv_nested_directories(storage): fs = AzureBlobFileSystem( account_name=storage.account_name, connection_string=CONN_STR ) From d43c24e1d1cc83919996f73ec394633a8820b59a Mon Sep 17 00:00:00 2001 From: Anjali Ratnam Date: Tue, 17 Mar 2026 17:18:02 -0700 Subject: [PATCH 5/5] updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a56da80d..f3206043 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ Unreleased - Added `**kwargs` to `AzureBlobFileSystem.exists()` - Populate `AzureBlobFile.version_id` on write when `version_aware` is enabled. - Fixed issue where unawaitable Credential types were incorrectly awaited (#431) -- Fixed a bug where moving files does not delete them from the original location +- Fixed a bug where moving files does not delete them from the original location (#255) 2026.2.0 --------