Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions gcsfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,9 +627,11 @@ async def _list_objects(self, path, prefix="", versions=False, **kwargs):
"use_snapshot_listing"
)

max_results = kwargs.get("max_results")

# Don't cache prefixed/partial listings, in addition to
# not using the inventory report service to do listing directly.
if not prefix and not use_snapshot_listing:
if not prefix and not use_snapshot_listing and not max_results:
self.dircache[path] = out
return out

Expand Down Expand Up @@ -683,7 +685,7 @@ async def _do_list_objects(
end_offset=None,
prefix=prefix,
versions=versions,
page_size=default_page_size,
max_results=max_results,
)

async def _concurrent_list_objects_helper(
Expand Down Expand Up @@ -736,7 +738,7 @@ async def _concurrent_list_objects_helper(
end_offset=end_offsets[i],
prefix=prefix,
versions=versions,
page_size=page_size,
max_results=page_size,
)
for i in range(0, len(start_offsets))
]
Expand All @@ -761,7 +763,7 @@ async def _sequential_list_objects_helper(
end_offset,
prefix,
versions,
page_size,
max_results,
):
"""
Sequential list objects within the start and end offset range.
Expand All @@ -778,7 +780,7 @@ async def _sequential_list_objects_helper(
prefix=prefix,
startOffset=start_offset,
endOffset=end_offset,
maxResults=page_size,
maxResults=max_results,
json_out=True,
versions="true" if versions else None,
)
Expand All @@ -796,7 +798,7 @@ async def _sequential_list_objects_helper(
prefix=prefix,
startOffset=start_offset,
endOffset=end_offset,
maxResults=page_size,
maxResults=max_results,
pageToken=next_page_token,
json_out=True,
versions="true" if versions else None,
Expand Down Expand Up @@ -985,7 +987,7 @@ def _parse_timestamp(self, timestamp):

async def _info(self, path, generation=None, **kwargs):
"""File information about this path."""
path = self._strip_protocol(path)
path = self._strip_protocol(path).rstrip("/")
if "/" not in path:
try:
out = await self._call("GET", f"b/{path}", json_out=True)
Expand Down Expand Up @@ -1014,7 +1016,7 @@ async def _info(self, path, generation=None, **kwargs):
# this is a directory
return {
"bucket": bucket,
"name": path.rstrip("/"),
"name": path,
"size": 0,
"storageClass": "DIRECTORY",
"type": "directory",
Expand All @@ -1023,21 +1025,20 @@ async def _info(self, path, generation=None, **kwargs):
try:
exact = await self._get_object(path)
# this condition finds a "placeholder" - still need to check if it's a directory
if exact["size"] or not exact["name"].endswith("/"):
if not (exact["size"] == 0 and exact["name"].endswith("/")):
return exact
except FileNotFoundError:
pass
kwargs["detail"] = True # Force to true for info
out = await self._ls(path, **kwargs)
out0 = [o for o in out if o["name"].rstrip("/") == path]
if out0:
out = await self._list_objects(path, max_results=1)
exact = next((o for o in out if o["name"].rstrip("/") == path), None)
if exact and not (exact["size"] == 0 and exact["name"].endswith("/")):
# exact hit
return out0[0]
return exact
elif out:
# other stuff - must be a directory
return {
"bucket": bucket,
"name": path.rstrip("/"),
"name": path,
"size": 0,
"storageClass": "DIRECTORY",
"type": "directory",
Expand All @@ -1060,13 +1061,32 @@ async def _ls(
for entry in await self._list_objects(
path, prefix=prefix, versions=versions, **kwargs
):
if entry["size"] == 0 and entry["name"].endswith("/"):
entry = {
"bucket": entry["bucket"],
"name": path.rstrip("/"),
"size": 0,
"storageClass": "DIRECTORY",
"type": "directory",
}
if entry in out:
Comment thread
Bjoern-Rapp marked this conversation as resolved.
Outdated
continue

if versions and "generation" in entry:
entry = entry.copy()
entry["name"] = f"{entry['name']}#{entry['generation']}"

out.append(entry)

if detail:
return out
return sorted(
out,
key=lambda e: (
Comment thread
martindurant marked this conversation as resolved.
Outdated
e["name"].count("/"),
e["type"] != "directory",
e["name"],
),
)
else:
return sorted([o["name"] for o in out])

Expand Down Expand Up @@ -1490,6 +1510,7 @@ async def _put_file(
self.invalidate_cache(self._parent(rpath))

async def _isdir(self, path):

try:
return (await self._info(path))["type"] == "directory"
except OSError:
Expand Down
41 changes: 41 additions & 0 deletions gcsfs/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,47 @@ def test_dir_marker(gcs):
assert out2["type"] == "directory"


def test_dir_marker_directory_not_listed(gcs):
gcs.touch(f"{TEST_BUCKET}/psudodir/")
gcs.touch(f"{TEST_BUCKET}/psudodir/innerfolder/innerfile")
gcs.invalidate_cache()
info = gcs.info(f"{TEST_BUCKET}/psudodir")
assert info["type"] == "directory"


def test_dir_marker_directory_listed(gcs):
gcs.touch(f"{TEST_BUCKET}/psudodir/")
gcs.touch(f"{TEST_BUCKET}/psudodir/innerfolder/innerfile")
gcs.invalidate_cache()
gcs.ls(f"{TEST_BUCKET}/psudodir")
info = gcs.info(f"{TEST_BUCKET}/psudodir")
assert info["type"] == "directory"


def test_dir_marker_parent_directory_listed(gcs):
gcs.touch(f"{TEST_BUCKET}/parent_psudodir/psudodir/")
gcs.touch(f"{TEST_BUCKET}/parent_psudodir/psudodir/innerfolder/innerfile")
gcs.invalidate_cache()
gcs.ls(f"{TEST_BUCKET}/parent_psudodir")
info = gcs.info(f"{TEST_BUCKET}/parent_psudodir/psudodir")
assert info["type"] == "directory"


def test_dir_marker_info_eq_ls(gcs):
gcs.touch(f"{TEST_BUCKET}/psudodir/")
gcs.invalidate_cache()
out1 = gcs.info(f"{TEST_BUCKET}/psudodir")
out2 = gcs.ls(f"{TEST_BUCKET}/psudodir", detail=True)[0]
assert out1["type"] == "directory"
assert out1 == out2

gcs.invalidate_cache()
out3 = gcs.ls(f"{TEST_BUCKET}/psudodir", detail=True)[0]
out4 = gcs.info(f"{TEST_BUCKET}/psudodir")
assert out3["type"] == "directory"
assert out3 == out4


def test_mkdir_with_path(gcs):
with pytest.raises(FileNotFoundError):
gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=False)
Expand Down
Loading