Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions graphrag_sdk/src/graphrag_sdk/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,10 @@ async def apply_changes(
added: list[str] | None = None,
modified: list[str] | None = None,
deleted: list[str] | None = None,
loader: LoaderStrategy | None = None,
chunker: ChunkingStrategy | None = None,
extractor: ExtractionStrategy | None = None,
resolver: ResolutionStrategy | None = None,
max_concurrency: int = 3,
update_concurrency: int = 1,
ctx: Context | None = None,
Expand Down Expand Up @@ -1263,6 +1267,15 @@ async def apply_changes(
added: New file paths to ingest.
modified: File paths whose content changed.
deleted: Document ids (typically file paths) to remove.
loader: Override the loader for ``added``/``modified`` (forwarded
to ``ingest()`` and ``update()``). Defaults to per-extension
auto-selection. ``deleted`` ignores this.
chunker: Override the chunking strategy for ``added``/``modified``.
Defaults to ``FixedSizeChunking``. ``deleted`` ignores this.
extractor: Override the entity-extraction strategy for
``added``/``modified``. ``deleted`` ignores this.
resolver: Override the resolution strategy for ``added``/
``modified``. ``deleted`` ignores this.
max_concurrency: Parallelism cap for ``ingest()`` of the
``added`` list. Matches ``ingest()``'s own knob and the
``add`` step is pure ingestion with no orphan-cleanup
Expand Down Expand Up @@ -1351,6 +1364,10 @@ async def _update_one(path: str) -> BatchEntry[UpdateResult]:
return BatchEntry.ok(
await self.update(
path,
loader=loader,
chunker=chunker,
extractor=extractor,
resolver=resolver,
if_missing="ingest",
ctx=ctx.child(),
)
Expand All @@ -1376,6 +1393,10 @@ async def _update_one(path: str) -> BatchEntry[UpdateResult]:
if added:
batch_out = await self.ingest(
added,
loader=loader,
chunker=chunker,
extractor=extractor,
resolver=resolver,
max_concurrency=max_concurrency,
ctx=ctx,
)
Expand Down Expand Up @@ -2027,6 +2048,10 @@ def apply_changes_sync(
added: list[str] | None = None,
modified: list[str] | None = None,
deleted: list[str] | None = None,
loader: LoaderStrategy | None = None,
chunker: ChunkingStrategy | None = None,
extractor: ExtractionStrategy | None = None,
resolver: ResolutionStrategy | None = None,
max_concurrency: int = 3,
update_concurrency: int = 1,
ctx: Context | None = None,
Expand All @@ -2045,6 +2070,10 @@ def apply_changes_sync(
added=added,
modified=modified,
deleted=deleted,
loader=loader,
chunker=chunker,
extractor=extractor,
resolver=resolver,
max_concurrency=max_concurrency,
update_concurrency=update_concurrency,
ctx=ctx,
Expand Down
72 changes: 72 additions & 0 deletions graphrag_sdk/tests/test_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -1737,6 +1737,78 @@ async def test_overlapping_ids_across_buckets_raises(
with pytest.raises(ValueError, match=label_fragment):
await graphrag.apply_changes(**kwargs)

async def test_strategy_overrides_forward_to_ingest_and_update(
self, graphrag, monkeypatch
):
"""``loader``/``chunker``/``extractor``/``resolver`` must reach the
inner ``ingest()`` and ``update()`` calls. Without forwarding,
CI callers using ``apply_changes`` as their single entrypoint
silently lose per-graph strategy customisation — the SDK falls
back to defaults instead."""
from graphrag_sdk.core.models import IngestionResult, UpdateResult
from graphrag_sdk.ingestion.chunking_strategies.fixed_size import FixedSizeChunking
from graphrag_sdk.ingestion.loaders.markdown_loader import MarkdownLoader
from graphrag_sdk.ingestion.resolution_strategies.exact_match import ExactMatchResolution

loader = MarkdownLoader()
chunker = FixedSizeChunking(chunk_size=500, chunk_overlap=50)
extractor = MagicMock()
resolver = ExactMatchResolution()

captured: dict[str, dict] = {"ingest": {}, "update": {}}

async def fake_ingest(source, **kwargs):
captured["ingest"] = dict(kwargs, source=source)
return [IngestionResult(document_id="a.md", chunks=0, entities=0, relations=0)]

async def fake_update(source, **kwargs):
captured["update"] = dict(kwargs, source=source)
return UpdateResult(
document_id="m.md", action="updated", chunks=0, entities=0, relations=0,
)
Comment on lines +1762 to +1768
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Use canonical IngestionResult/UpdateResult fields in test fakes (Line 1762, Line 1767, Line 1795, Line 1800).

These fake return objects are built with non-canonical kwargs (document_id, chunks, entities, relations, action), which can break or make the tests misleading versus the real API shape.

Proposed fix
-        from graphrag_sdk.core.models import IngestionResult, UpdateResult
+        from graphrag_sdk.core.models import DocumentInfo, IngestionResult, UpdateResult
@@
         async def fake_ingest(source, **kwargs):
             captured["ingest"] = dict(kwargs, source=source)
-            return [IngestionResult(document_id="a.md", chunks=0, entities=0, relations=0)]
+            return [
+                IngestionResult(
+                    document_info=DocumentInfo(uid="a.md", path="a.md"),
+                    nodes_created=0,
+                    relationships_created=0,
+                    chunks_indexed=0,
+                    metadata={},
+                )
+            ]
@@
         async def fake_update(source, **kwargs):
             captured["update"] = dict(kwargs, source=source)
             return UpdateResult(
-                document_id="m.md", action="updated", chunks=0, entities=0, relations=0,
+                document_info=DocumentInfo(uid="m.md", path="m.md"),
+                nodes_created=0,
+                relationships_created=0,
+                chunks_indexed=0,
+                metadata={},
+                replaced_existing=True,
+                no_op=False,
             )
@@
-        from graphrag_sdk.core.models import IngestionResult, UpdateResult
+        from graphrag_sdk.core.models import DocumentInfo, IngestionResult, UpdateResult
@@
         async def fake_ingest(source, **kwargs):
             captured["ingest"] = dict(kwargs)
-            return [IngestionResult(document_id="a.md", chunks=0, entities=0, relations=0)]
+            return [
+                IngestionResult(
+                    document_info=DocumentInfo(uid="a.md", path="a.md"),
+                    nodes_created=0,
+                    relationships_created=0,
+                    chunks_indexed=0,
+                    metadata={},
+                )
+            ]
@@
         async def fake_update(source, **kwargs):
             captured["update"] = dict(kwargs)
             return UpdateResult(
-                document_id="m.md", action="updated", chunks=0, entities=0, relations=0,
+                document_info=DocumentInfo(uid="m.md", path="m.md"),
+                nodes_created=0,
+                relationships_created=0,
+                chunks_indexed=0,
+                metadata={},
+                replaced_existing=True,
+                no_op=False,
             )

Also applies to: 1795-1801

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@graphrag_sdk/tests/test_facade.py` around lines 1762 - 1768, The test fakes
construct IngestionResult and UpdateResult with non-canonical kwarg names (e.g.,
document_id, chunks, entities, relations, action); update the fake return
objects in the functions that build these results (the IngestionResult call
around the earlier fake ingest and the fake_update function returning
UpdateResult, and the similar fakes at the other occurrences around lines
1795–1801) to use the true/canonical dataclass field names used by the real API
(inspect the IngestionResult and UpdateResult definitions to find the correct
attribute names such as the document reference, chunk/entity/relation counts,
and status/action field) and set equivalent values so the tests reflect the real
object shape.


monkeypatch.setattr(graphrag, "ingest", fake_ingest)
monkeypatch.setattr(graphrag, "update", fake_update)

await graphrag.apply_changes(
added=["a.md"], modified=["m.md"],
loader=loader, chunker=chunker, extractor=extractor, resolver=resolver,
)

for inner in ("ingest", "update"):
assert captured[inner]["loader"] is loader
assert captured[inner]["chunker"] is chunker
assert captured[inner]["extractor"] is extractor
assert captured[inner]["resolver"] is resolver

async def test_strategy_overrides_default_to_none(
self, graphrag, monkeypatch
):
"""Default behaviour is unchanged: callers who don't pass strategies
get ``None`` forwarded, which the SDK reads as "use defaults"."""
from graphrag_sdk.core.models import IngestionResult, UpdateResult

captured: dict[str, dict] = {"ingest": {}, "update": {}}

async def fake_ingest(source, **kwargs):
captured["ingest"] = dict(kwargs)
return [IngestionResult(document_id="a.md", chunks=0, entities=0, relations=0)]

async def fake_update(source, **kwargs):
captured["update"] = dict(kwargs)
return UpdateResult(
document_id="m.md", action="updated", chunks=0, entities=0, relations=0,
)

monkeypatch.setattr(graphrag, "ingest", fake_ingest)
monkeypatch.setattr(graphrag, "update", fake_update)

await graphrag.apply_changes(added=["a.md"], modified=["m.md"])

for inner in ("ingest", "update"):
for k in ("loader", "chunker", "extractor", "resolver"):
assert captured[inner][k] is None


class TestGraphRAGUpdateSyncWrapper:
"""v1.1.0: sync wrappers mirror the async signatures."""
Expand Down
Loading