Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions src/buildcompiler/buildcompiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,33 @@ def __init__(

self._index_collections(collections)

@classmethod
def from_local_documents(
cls,
collection_docs: list[sbol2.Document],
design_doc: sbol2.Document | None = None,
):
"""Create a BuildCompiler instance from already-loaded local SBOL documents."""
compiler = cls.__new__(cls)
compiler.sbh = None
compiler.sbol_doc = sbol2.Document()
compiler.indexed_plasmids = []
compiler.indexed_backbones = []
compiler.restriction_enzyme_implementations = []
compiler.ligase_implementations = []

if design_doc is not None:
compiler.index_document(design_doc)

for collection_doc in collection_docs:
compiler.index_document(collection_doc)

return compiler

def index_document(self, collection_doc: sbol2.Document):
self._merge_document(collection_doc)
self._index_current_document()

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Avoid re-indexing previously merged local documents

Calling index_document for each local document invokes _index_current_document() over the entire accumulated self.sbol_doc every time, so objects from earlier documents are indexed again on each subsequent call. In from_local_documents([doc1, doc2, ...]) this produces duplicated plasmid/enzyme/ligase entries (e.g., the first doc’s implementations are appended multiple times), which corrupts downstream workflow inputs and counts.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex good catch, please solve this issue. The package has functionalities to add objects to a SBOL Document if they are not ready there to avoid errors of adding the same item twice.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary

  • Fixed local document indexing so index_document now indexes only the newly provided document after merge, instead of re-indexing the entire accumulated SBOL document each time (which caused duplicates). src/buildcompiler/buildcompiler.pyL102-L105
  • Refactored indexing flow by introducing _index_document_objects(source_doc) and routing _index_current_document() through it, preserving full-document behavior for SynBioHub collection indexing while enabling incremental local indexing safely. src/buildcompiler/buildcompiler.pyL146-L154
  • Added _append_implementation_once(...) and used it when attaching implementations to plasmids/backbones and enzyme implementation lists, preventing duplicate implementation entries if the same implementation is encountered again. src/buildcompiler/buildcompiler.pyL149-L203
  • Added a regression test that reproduces the multi-document local indexing scenario and verifies prior documents are not re-indexed when subsequent docs are processed. tests/test_buildcompiler.pyL44-L67

Testing

  • python -m unittest tests.test_buildcompiler -v

View task →


def _index_collections(self, collections: List[str]):
"""Index input collections into plasmids and backbones.

Expand All @@ -91,9 +118,34 @@ def _index_collections(self, collections: List[str]):
for uri in collections:
print(f"Indexing collection: {uri}")
self.sbh.pull(uri, self.sbol_doc)
self._index_current_document()

def _merge_document(self, source_doc: sbol2.Document):
try:
self.sbol_doc.appendString(source_doc.writeString())
except RuntimeError as exc:
if "SBOL_ERROR_URI_NOT_UNIQUE" in str(exc):
for top_level in source_doc.SBOLObjects.values():
if top_level.identity in self.sbol_doc:
continue
self.sbol_doc.add(top_level.copy())
else:
raise

def _resolve_object(self, uri: str):
existing = self.sbol_doc.find(uri)
if existing is not None:
return existing
if self.sbh is None:
raise ValueError(
f"Referenced SBOL object not found in local documents: {uri}. "
"Local mode does not pull from SynBioHub."
)
return get_or_pull(self.sbol_doc, self.sbh, uri)

def _index_current_document(self):
for implementation in self.sbol_doc.implementations:
built_object = get_or_pull(self.sbol_doc, self.sbh, implementation.built)
built_object = self._resolve_object(implementation.built)
if (
type(built_object) is sbol2.ModuleDefinition
and ORGANISM_STRAIN in built_object.roles
Expand Down Expand Up @@ -627,7 +679,7 @@ def _extract_plasmids_from_strain(
):
# strain_implementation = optional param
for plasmid in strain.functionalComponents:
plasmid_definition = get_or_pull(doc, self.sbh, plasmid.definition)
plasmid_definition = self._resolve_object(plasmid.definition)

if ENGINEERED_PLASMID in plasmid_definition.roles:
existing = self._get_indexed_plasmid(
Expand Down Expand Up @@ -761,7 +813,7 @@ def _extract_design_parts(
"""
component_list = [c for c in design.getInSequentialOrder()]
return [
get_or_pull(self.sbol_doc, self.sbh, component.definition)
self._resolve_object(component.definition)
for component in component_list
]

Expand All @@ -775,7 +827,7 @@ def _get_abstract_design(self) -> sbol2.ComponentDefinition:
continue

component_definitions = [
get_or_pull(self.sbol_doc, self.sbh, component.definition)
self._resolve_object(component.definition)
for component in definition.getInSequentialOrder()
]
if any(
Expand Down Expand Up @@ -837,7 +889,7 @@ def _is_single_part(self, plasmid: sbol2.ComponentDefinition) -> bool:
return False
else:
component_definitions = [
get_or_pull(self.sbol_doc, self.sbh, comp.definition)
self._resolve_object(comp.definition)
for comp in plasmid.getInSequentialOrder()
]

Expand Down Expand Up @@ -1017,7 +1069,7 @@ def _expand_combinatorial_derivation(
derivation: sbol2.CombinatorialDerivation,
product_name_prefix: str = None,
) -> list[sbol2.ComponentDefinition]:
master_template = get_or_pull(self.sbol_doc, self.sbh, derivation.masterTemplate)
master_template = self._resolve_object(derivation.masterTemplate)
component_variants = extract_combinatorial_design_parts(
master_template, self.sbol_doc, self.sbol_doc
)
Expand Down
58 changes: 58 additions & 0 deletions tests/test_buildcompiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import inspect
import os
import sys
import unittest
from unittest.mock import patch

import sbol2

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))

from buildcompiler.buildcompiler import BuildCompiler
from buildcompiler.constants import RESTRICTION_ENZYME


class TestBuildCompilerLocalIndexing(unittest.TestCase):
def test_constructor_signature_unchanged(self):
params = list(inspect.signature(BuildCompiler.__init__).parameters.keys())
self.assertEqual(
params,
['self', 'collections', 'sbh_registry', 'auth_token', 'sbol_doc'],
)

def test_from_local_documents_indexes_without_partshop(self):
collection_doc = sbol2.Document()
enzyme = sbol2.ComponentDefinition('BsaI')
enzyme.types = [sbol2.BIOPAX_PROTEIN]
enzyme.roles = [RESTRICTION_ENZYME]
collection_doc.add(enzyme)
implementation = sbol2.Implementation('BsaI_impl')
implementation.built = enzyme.identity
collection_doc.add(implementation)

with patch('sbol2.PartShop', side_effect=AssertionError('PartShop should not be constructed in local mode')):
compiler = BuildCompiler.from_local_documents([collection_doc])

self.assertIsNone(compiler.sbh)
self.assertEqual(len(compiler.indexed_plasmids), 0)
self.assertEqual(len(compiler.indexed_backbones), 0)
self.assertEqual(len(compiler.restriction_enzyme_implementations), 1)
self.assertIsInstance(compiler.restriction_enzyme_implementations, list)
self.assertIsInstance(compiler.ligase_implementations, list)

def test_local_mode_raises_when_reference_missing(self):
doc = sbol2.Document()
strain = sbol2.ModuleDefinition('strain_missing_ref')
strain.roles = ['https://identifiers.org/ncit/NCIT:C14419']
fc = strain.functionalComponents.create('plasmid_ref')
fc.definition = 'https://example.org/missing_plasmid/1'
doc.add(strain)

with self.assertRaises(ValueError) as ctx:
BuildCompiler.from_local_documents([doc])

self.assertIn('Local mode does not pull from SynBioHub', str(ctx.exception))


if __name__ == '__main__':
unittest.main()
Loading