fsspec · googlyrahman · May 11, 2026 · May 12, 2026 · zhixiangli · May 12, 2026
diff --git a/gcsfs/extended_gcsfs.py b/gcsfs/extended_gcsfs.py
@@ -27,12 +27,7 @@
 from gcsfs import zb_hns_utils
 from gcsfs.core import GCSFile, GCSFileSystem
 from gcsfs.retry import DEFAULT_RETRY_CONFIG, get_storage_control_retry_config
-from gcsfs.zb_hns_utils import (
-    DirectMemmoveBuffer,
-    MRDPool,
-    PyBytes_AsString,
-    PyBytes_FromStringAndSize,
-)
+from gcsfs.zb_hns_utils import DirectMemmoveBuffer, MRDPool
 from gcsfs.zonal_file import ZonalFile
 
 logger = logging.getLogger("gcsfs")
@@ -411,39 +406,34 @@ async def _fetch_range_split(
                 await mrd.close()
 
     async def _concurrent_mrd_fetch(self, offset, length, concurrency, mrd_or_pool):
-        """Helper to handle concurrent chunk downloads into a DirectMemmoveBuffer."""
+        """Helper to handle concurrent chunk downloads cleanly."""
         concurrency = (
             concurrency if length >= self.MIN_CHUNK_SIZE_FOR_CONCURRENCY else 1
         )
-        result_bytes = PyBytes_FromStringAndSize(None, length)
-        buffer_ptr = PyBytes_AsString(result_bytes)
-
         part_size = length // concurrency
-        tasks = []
-        buffers = []
-        loop = asyncio.get_running_loop()
 
-        # Track if the core download process failed
+        tasks = []
+        views = []
         has_error = False
 
-        async def _download(o, s, b, mrd_or_pool):
+        # The master buffer manages its own allocation under the hood
+        master_buffer = DirectMemmoveBuffer(length, self._memmove_executor)
+
+        async def _download(o, s, view, mrd_or_pool):
             async with _get_mrd_from_pool_or_mrd(mrd_or_pool) as m_client:
-                await m_client.download_ranges([(o, s, b)])
+                await m_client.download_ranges([(o, s, view)])
 
         for i in range(concurrency):
             part_offset = offset + (i * part_size)
             actual_size = part_size if i < concurrency - 1 else length - (i * part_size)
 
-            part_address = buffer_ptr + (part_offset - offset)
-            buf = DirectMemmoveBuffer(
-                part_address,
-                part_address + actual_size,
-                self._memmove_executor,
-            )
-            buffers.append(buf)
+            # Give each task a restricted view of the master buffer
+            view = master_buffer.get_view(part_offset - offset, actual_size)
+            views.append(view)
+
             tasks.append(
                 asyncio.create_task(
-                    _download(part_offset, actual_size, buf, mrd_or_pool)
+                    _download(part_offset, actual_size, view, mrd_or_pool)
                 )
             )
 
@@ -453,6 +443,8 @@ async def _download(o, s, b, mrd_or_pool):
                 if isinstance(res, Exception):
                     has_error = True
                     raise res
+            for view in views:
+                view.close()
         except BaseException:
             has_error = True
             for t in tasks:
@@ -461,18 +453,17 @@ async def _download(o, s, b, mrd_or_pool):
             await asyncio.gather(*tasks, return_exceptions=True)
             raise
         finally:
-            for buf in buffers:
-                try:
-                    await loop.run_in_executor(None, buf.close)
-                except BufferError:
-                    # If we are already handling a network/download exception,
-                    # ignore the BufferError (which is just a symptom of the drop).
-                    # If there's no download error, this means the buffer logic
-                    # itself failed, so we must surface the error.
-                    if not has_error:
-                        raise
-
-        return result_bytes
+            try:
+                master_buffer.close()
+            except Exception:
+                # If we are already handling a network/download exception,
+                # ignore the exception from buffer (which is just a symptom of the drop).
+                # If there's no download error, this means the buffer logic
+                # itself failed, so we must surface the error.
+                if not has_error:
+                    raise
+
+        return master_buffer.get_value()
 
     async def _cat_file(
         self,

diff --git a/gcsfs/tests/test_zb_hns_utils.py b/gcsfs/tests/test_zb_hns_utils.py
@@ -1,5 +1,4 @@
 import concurrent.futures
-import ctypes
 import logging
 from unittest import mock
 
@@ -380,104 +379,6 @@ async def test_mrd_pool_close_with_exceptions(create_mrd_mock, mock_gcsfs):
     assert len(pool._all_mrds) == 0
 
 
-@mock.patch("gcsfs.zb_hns_utils.ctypes.memmove")
-def test_direct_memmove_buffer_error_handling(mock_memmove):
-    size = 20
-    buffer_array = (ctypes.c_char * size)()
-    start_address = ctypes.addressof(buffer_array)
-    end_address = start_address + size
-
-    # Simulate an access violation or similar error during memory copy
-    mock_memmove.side_effect = MemoryError("Segfault simulated")
-
-    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-    buf = DirectMemmoveBuffer(start_address, end_address, executor, max_pending=2)
-
-    # First write triggers the background error
-    future = buf.write(b"bad data")
-
-    # Wait for the background thread to actually fail
-    with pytest.raises(MemoryError):
-        future.result()
-
-    # Subsequent writes should raise the stored error immediately
-    with pytest.raises(MemoryError, match="Segfault simulated"):
-        buf.write(b"more data")
-
-    # Close should also raise the stored error.
-    with pytest.raises(MemoryError, match="Segfault simulated"):
-        buf.close()
-
-    executor.shutdown()
-
-
-def test_direct_memmove_buffer():
-    data1 = b"hello"
-    data2 = b"world"
-
-    # Calculate exact size to prevent the new underflow check from failing
-    size = len(data1) + len(data2)
-    buffer_array = (ctypes.c_char * size)()
-    start_address = ctypes.addressof(buffer_array)
-    end_address = start_address + size
-
-    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
-    buf = DirectMemmoveBuffer(start_address, end_address, executor, max_pending=2)
-
-    future1 = buf.write(data1)
-    future2 = buf.write(data2)
-
-    future1.result()
-    future2.result()
-    buf.close()
-
-    result_bytes = ctypes.string_at(start_address, len(data1) + len(data2))
-    assert result_bytes == b"helloworld"
-
-    executor.shutdown()
-
-
-def test_direct_memmove_buffer_overflow():
-    """Tests that writing past the allocated end_address raises a BufferError."""
-    size = 10
-    buffer_array = (ctypes.c_char * size)()
-    start_address = ctypes.addressof(buffer_array)
-    end_address = start_address + size
-
-    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-    buf = DirectMemmoveBuffer(start_address, end_address, executor, max_pending=2)
-
-    # Fill the buffer exactly to capacity
-    buf.write(b"1234567890")
-
-    # Attempting to write even 1 more byte should trigger the overflow protection
-    with pytest.raises(BufferError, match="Attempted to write"):
-        buf.write(b"1")
-
-    buf.close()
-    executor.shutdown()
-
-
-def test_direct_memmove_buffer_underflow():
-    """Tests that closing an incompletely filled buffer raises a BufferError."""
-    size = 10
-    buffer_array = (ctypes.c_char * size)()
-    start_address = ctypes.addressof(buffer_array)
-    end_address = start_address + size
-
-    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-    buf = DirectMemmoveBuffer(start_address, end_address, executor, max_pending=2)
-
-    # Write fewer bytes than the expected capacity
-    buf.write(b"12345")
-
-    # Closing should detect that current_offset (5) < expected size (10)
-    with pytest.raises(BufferError, match="Buffer contains uninitialized data"):
-        buf.close()
-
-    executor.shutdown()
-
-
 @pytest.mark.asyncio
 async def test_mrd_pool_queue_filled_during_lock_wait(mock_gcsfs):
     pool = MRDPool(mock_gcsfs, "bucket", "obj", "123", pool_size=1)
@@ -547,28 +448,129 @@ async def fake_create_mrd():
                     assert pool._rr_index == 1
 
 
+@mock.patch("gcsfs.zb_hns_utils.ctypes.memmove")
+def test_direct_memmove_buffer_error_handling(mock_memmove):
+    # Use a size > 128KB to trigger the executor background path
+    size = 130 * 1024 + 10
+    data1 = b"a" * (130 * 1024)
+    data2 = b"b" * 10
+
+    # Simulate an access violation or similar error during memory copy
+    mock_memmove.side_effect = MemoryError("Segfault simulated")
+
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+    view = buf.get_view(0, size)
+
+    # First write triggers the background error (slow path)
+    future = view.write(data1)
+
+    # Wait for the background thread to actually fail
+    with pytest.raises(MemoryError):
+        future.result()
+
+    # Subsequent writes should raise the stored error immediately
+    with pytest.raises(MemoryError, match="Segfault simulated"):
+        view.write(data2)
+
+    # Close should also raise the stored error.
+    with pytest.raises(MemoryError, match="Segfault simulated"):
+        buf.close()
+
+    executor.shutdown()
+
+
+def test_direct_memmove_buffer():
+    data1 = b"hello"
+    data2 = b"world"
+    size = len(data1) + len(data2)
+
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+    view = buf.get_view(0, size)
+
+    future1 = view.write(data1)
+    future2 = view.write(data2)
+
+    future1.result()
+    future2.result()
+
+    view.close()
+    buf.close()
+
+    result_bytes = buf.get_value()
+    assert result_bytes == b"helloworld"
+
+    executor.shutdown()
+
+
+def test_direct_memmove_buffer_overflow():
+    """Tests that writing past the view boundaries raises a BufferError."""
+    size = 10
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+    view = buf.get_view(0, size)
+
+    # Fill the buffer exactly to capacity
+    view.write(b"1234567890")
+
+    # Attempting to write even 1 more byte should trigger the overflow protection
+    with pytest.raises(BufferError, match="Attempted to write"):
+        view.write(b"1")
+
+    view.close()
+    buf.close()
+    executor.shutdown()
+
+
+def test_direct_memmove_buffer_underflow():
+    """Tests that closing an incompletely filled view/buffer raises a BufferError."""
+    size = 10
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+    view = buf.get_view(0, size)
+
+    # Write fewer bytes than the expected capacity
+    view.write(b"12345")
+
+    # Closing the view should detect that current_offset (5) < expected size (10)
+    with pytest.raises(BufferError, match="Buffer contains uninitialized data"):
+        view.close()
+
+    # Calling get_value after an incompletely filled buffer should also error
+    buf.close()
+    with pytest.raises(BufferError, match="Buffer incomplete"):
+        buf.get_value()
+
+    executor.shutdown()
+
+
 @mock.patch("gcsfs.zb_hns_utils.ctypes.memmove")
 def test_direct_memmove_buffer_submit_failure(mock_memmove):
     """
     Tests that if executor.submit fails synchronously (e.g., executor is closed),
     the internal locks, semaphores, and events are properly reset, and close()
     does not hang.
     """
-    size = 10
-    buffer_array = (ctypes.c_char * size)()
-    start_address = ctypes.addressof(buffer_array)
-    end_address = start_address + size
+    # 1. Chunk > 128KB to force executor scheduling (skip the synchronous fast path)
+    chunk_size = 130 * 1024
+
+    # 2. Expected size > chunk_size to skip the Zero-Copy optimization
+    expected_size = 140 * 1024
+
+    data = b"a" * chunk_size
 
     executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-    buf = DirectMemmoveBuffer(start_address, end_address, executor, max_pending=2)
+    buf = DirectMemmoveBuffer(expected_size, executor, max_pending=2)
+    view = buf.get_view(0, expected_size)
 
     # Mock the submit method to simulate a closed executor throwing a RuntimeError
     with mock.patch.object(
         executor, "submit", side_effect=RuntimeError("Executor closed")
     ):
         # The write operation should raise the simulated RuntimeError
         with pytest.raises(RuntimeError, match="Executor closed"):
-            buf.write(b"12345")
+            view.write(data)
 
     # Verify that the internal tracking state was correctly rolled back
     assert buf._pending_count == 0
@@ -579,3 +581,46 @@ def test_direct_memmove_buffer_submit_failure(mock_memmove):
         buf.close()
 
     executor.shutdown()
+
+
+def test_direct_memmove_buffer_zero_copy():
+    """Tests that a perfect aligned single payload avoids memory allocation completely."""
+    data = b"exact_size_payload"
+    size = len(data)
+
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+    view = buf.get_view(0, size)
+
+    # Writing a single payload identical to the expected size
+    future = view.write(data)
+    future.result()
+
+    view.close()
+    buf.close()
+
+    # Should be the EXACT same string object returned without copying
+    result = buf.get_value()
+    assert result is data
+
+    executor.shutdown()
+
+
+def test_direct_memmove_buffer_overlapping_views():
+    """Tests that getting overlapping views raises a ValueError."""
+    size = 100
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+    buf = DirectMemmoveBuffer(size, executor, max_pending=2)
+
+    # Get a view for the first half
+    _ = buf.get_view(0, 50)
+
+    # Attempting to get an overlapping view should fail
+    with pytest.raises(ValueError, match="Overlapping view requested"):
+        _ = buf.get_view(25, 50)
+
+    # Getting a view for the second half should succeed
+    _ = buf.get_view(50, 50)
+
+    buf.close()
+    executor.shutdown()