Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ There are many libraries for traversing directories. You can also do this using
- [**Filtering**](#filtering)
- [**Working with Cancellation Tokens**](#working-with-cancellation-tokens)
- [**Combination**](#combination)
- [**Transactionality**](#transactionality)


## Installation
Expand Down Expand Up @@ -178,3 +179,16 @@ for path in Crawler('../dirstree', '../cantok'):
```

> ↑ In this case, there is no deduplication of paths.


## Transactionality

If you plan to modify the directory while iterating over it — for example, deleting or moving files inside an `apply()` callback — pass `freeze=True` to take a snapshot of every matching path up front, then iterate that snapshot instead of the live filesystem:

```python
Crawler('path/to/directory', freeze=True).apply(lambda p: p.unlink())
```

> ↑ The snapshot is built on the first step of iteration, with every filter and cancellation token already applied. After that, any creation, renaming or deletion happening in the directory does not affect what is yielded — each call to `go()` or `iter()` produces its own fresh snapshot.

> ↑ Without `freeze=True` the order of yielded paths depends on the live state of the filesystem, so mid-iteration mutation may silently skip or duplicate entries.
22 changes: 18 additions & 4 deletions dirstree/crawlers/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@ class Crawler(AbstractCrawler):
Only the first argument with the directory path is required, the rest are optional.
"""

def __init__(
def __init__( # noqa: PLR0913
self,
*paths: Union[str, Path],
extensions: Optional[Collection[str]] = None,
exclude: Optional[List[str]] = None,
filter: Optional[Callable[[Path], bool]] = None, # noqa: A002
token: AbstractToken = DefaultToken(), # noqa: B008
only_files: bool = True,
freeze: bool = False,
) -> None:
if extensions is not None and not only_files:
raise IncompatibleCrawlerOptionsError(
Expand All @@ -61,6 +62,7 @@ def __init__(
self.filter = filter
self.token = token
self.only_files = only_files
self.frozen = freeze

self.addictional_repr_filters: Dict[str, Callable[[Any], bool]] = {}

Expand All @@ -71,6 +73,7 @@ def __repr__(self) -> str:
'filter': not_none,
'token': lambda x: not isinstance(x, DefaultToken),
'only_files': lambda x: x is False,
'freeze': lambda x: x is True,
}
filters.update(self.addictional_repr_filters)

Expand All @@ -83,13 +86,12 @@ def __repr__(self) -> str:
'filter': self.filter,
'token': self.token,
'only_files': self.only_files,
'freeze': self.frozen,
},
filters=filters, # type: ignore[arg-type]
)

def go(self, token: AbstractToken = DefaultToken()) -> Generator[Path, None, None]: # noqa: B008
token = token + self.token

def _traverse(self, token: AbstractToken) -> Generator[Path, None, None]:
excludes_spec = pathspec.PathSpec.from_lines('gitwildmatch', self.exclude)

for path in self.paths:
Expand All @@ -111,3 +113,15 @@ def go(self, token: AbstractToken = DefaultToken()) -> Generator[Path, None, Non
break
else:
break

def go(self, token: AbstractToken = DefaultToken()) -> Generator[Path, None, None]: # noqa: B008
token = token + self.token

if self.frozen:
snapshot = list(self._traverse(token))
for path in snapshot:
if not token:
break
yield path
else:
yield from self._traverse(token)
3 changes: 2 additions & 1 deletion dirstree/crawlers/python_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ def __init__(
exclude: Optional[List[str]] = None,
filter: Optional[Callable[[Path], bool]] = None, # noqa: A002
token: AbstractToken = DefaultToken(), # noqa: B008
freeze: bool = False,
) -> None:
super().__init__(
*paths, extensions=('.py',), exclude=exclude, filter=filter, token=token,
*paths, extensions=('.py',), exclude=exclude, filter=filter, token=token, freeze=freeze,
)
self.addictional_repr_filters = {
'extensions': lambda x: False, # noqa: ARG005
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "dirstree"
version = "0.0.10"
version = "0.0.11"
authors = [{ name = "Evgeniy Blinov", email = "zheni-b@yandex.ru" }]
description = 'Another library for iterating through the contents of a directory'
readme = "README.md"
Expand Down
Loading
Loading