From 832c1d44bbfbeeb18083b902e45c31165abb8de4 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 25 Jul 2022 15:35:17 +0100 Subject: [PATCH 01/42] adding digeastion --- sbol_utilities/component.py | 85 ++++++++++++++++++++++++++++++++++++- setup.py | 3 +- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index b9a8f619..dc7ecee6 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -5,10 +5,11 @@ import sbol3 import tyto -from sbol_utilities.helper_functions import id_sort, find_child, find_top_level, SBOL3PassiveVisitor, cached_references, is_plasmid +from sbol_utilities.helper_functions import id_sort, find_child, find_top_level, SBOL3PassiveVisitor, cached_references, is_plasmid, is_circular from sbol_utilities.workarounds import get_parent from Bio import Restriction +from pydna import Dseqrecord # TODO: consider allowing return of LocalSubComponent and ExternallyDefined @@ -621,4 +622,84 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.features.append(backbone_subcomponent) # adding topology part_in_backbone_component.types.append(topology_type) - return part_in_backbone_component, part_in_backbone_seq \ No newline at end of file + return part_in_backbone_component, part_in_backbone_seq + +def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: + """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. + + :param reactant: DNA to be digested as SBOL Component. + :param restriction_enzymes: Restriction enzymes used Externally Defined. + :return: A tuple of Component and Interaction. + """ + if all(t != sbol3.SBO_DNA for t in reactant.types): + raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.') + if len(reactant.sequences)!=1: + raise ValueError(f'The reactant needs to have only one sequence. The input reactant has {len(reactant.sequences)} sequences') + participations=[] + restriction_enzymes_pydna=[] + for re in restriction_enzymes: + enzyme = Restriction.__dict__[re.name] + restriction_enzymes_pydna.append(enzyme) + assembly_plan.features.append(re) + modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re) + participations.append(modifier_participation) + + # Inform topology to PyDNA, if not found assuming circular. + if is_circular(reactant): + circular=True + linear=False + elif any(n==sbol3.SO_LINEAR for n in reactant.types): + circular=False + linear=True + else: + circular=True + linear=False + reactant_seq = reactant.sequences[0].lookup().elements + # Dseqrecord is from PyDNA package with reactant sequence + ds_reactant = Dseqrecord(reactant_seq, linear=linear, circular=circular) + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) + + if len(digested_reactant)==0 or len(digested_reactant)>3: + raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}') + elif circular and len(digested_reactant)==2: + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) + part_extract, backbone = digested_reactant + elif linear and len(digested_reactant)==3: + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) + # check digested_reactant + prefix, part_extract, suffix = digested_reactant + else: raise NotImplementedError('The reactant has no valid topology type') + # Extracting roles from features + reactant_features_roles = [] + for f in reactant.features: + for r in f.roles: + reactant_features_roles.append(r) + # if part + if any(n==tyto.SO.engineered_insert for n in reactant_features_roles): + product_sequence = part_extract.seq + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=str(product_sequence)) + # add sticky ends features + # if backbone + elif any(n==tyto.SO.deletion for n in reactant_features_roles): + product_sequence = backbone.seq + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=str(product_sequence)) + # add sticky ends features + # add recognition site features + else: raise NotImplementedError('The reactant has no valid roles') + + # Create reactant Participation. + react_subcomp = sbol3.SubComponent(reactant) + assembly_plan.features.append(react_subcomp) + reactant_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=react_subcomp) + participations.append(reactant_participation) + + prod_subcomp = sbol3.SubComponent(prod_comp) + assembly_plan.features.append(prod_subcomp) + product_participation = sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=prod_subcomp) + participations.append(product_participation) + + # Make Interaction + interaction = sbol3.Interaction(types=[tyto.SBO.cleavage], participations=participations) + assembly_plan.interactions.append(interaction) + + return prod_comp, prod_seq \ No newline at end of file diff --git a/setup.py b/setup.py index b6fe9986..f06f9708 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,8 @@ 'graphviz', 'tyto>=1.2.1', 'openpyxl', - 'sbol_factory>=1.0a11' + 'sbol_factory>=1.0a11', + 'pydna' ], extras_require={ # requirements for development 'dev': ['pytest', 'interrogate'] From 6e702ba0a0998724940daca11e3f1bf3293d618e Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 25 Jul 2022 15:56:46 +0100 Subject: [PATCH 02/42] solving Dseqrecord import --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index dc7ecee6..9635ec92 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -9,7 +9,7 @@ from sbol_utilities.workarounds import get_parent from Bio import Restriction -from pydna import Dseqrecord +from pydna.dseqrecord import Dseqrecord # TODO: consider allowing return of LocalSubComponent and ExternallyDefined From e26c2c84583e39ee5b485dab0702ade672f6b8ba Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Wed, 27 Jul 2022 12:17:12 +0100 Subject: [PATCH 03/42] updating sequence number cheking error message Co-authored-by: Jacob Beal --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 9635ec92..00263be4 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -634,7 +634,7 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall if all(t != sbol3.SBO_DNA for t in reactant.types): raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.') if len(reactant.sequences)!=1: - raise ValueError(f'The reactant needs to have only one sequence. The input reactant has {len(reactant.sequences)} sequences') + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences') participations=[] restriction_enzymes_pydna=[] for re in restriction_enzymes: From 944c0abbe1c70289c63ec2c7f937dae862a50ddb Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Wed, 27 Jul 2022 12:18:20 +0100 Subject: [PATCH 04/42] Update digested_reactant logic Co-authored-by: Jacob Beal --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 00263be4..c7717bab 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -659,7 +659,7 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall ds_reactant = Dseqrecord(reactant_seq, linear=linear, circular=circular) digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) - if len(digested_reactant)==0 or len(digested_reactant)>3: + if len(digested_reactant)<2 or len(digested_reactant)>3: raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}') elif circular and len(digested_reactant)==2: digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) From 9a4e8c5b162cda261ac6e4d7cf4965cbd3659e47 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Thu, 28 Jul 2022 14:57:00 +0100 Subject: [PATCH 05/42] updated digestion --- sbol_utilities/component.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index c7717bab..b413f16e 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -626,12 +626,13 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. + The product Component is assumed to be the insert for parts in backbone and the backbone for backbones. :param reactant: DNA to be digested as SBOL Component. :param restriction_enzymes: Restriction enzymes used Externally Defined. :return: A tuple of Component and Interaction. """ - if all(t != sbol3.SBO_DNA for t in reactant.types): + if sbol3.SBO_DNA not in reactant.types: raise TypeError(f'The reactant should has a DNA type. Types founded {reactant.types}.') if len(reactant.sequences)!=1: raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(reactant.sequences)} sequences') @@ -644,16 +645,14 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re) participations.append(modifier_participation) - # Inform topology to PyDNA, if not found assuming circular. + # Inform topology to PyDNA, if not found assuming linear. if is_circular(reactant): circular=True linear=False - elif any(n==sbol3.SO_LINEAR for n in reactant.types): + else: circular=False linear=True - else: - circular=True - linear=False + reactant_seq = reactant.sequences[0].lookup().elements # Dseqrecord is from PyDNA package with reactant sequence ds_reactant = Dseqrecord(reactant_seq, linear=linear, circular=circular) @@ -661,9 +660,10 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall if len(digested_reactant)<2 or len(digested_reactant)>3: raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}') + #TODO select them based on content rather than size. elif circular and len(digested_reactant)==2: - digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) - part_extract, backbone = digested_reactant + digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) + part_extract, backbone = sorted(digested_reactant, key=len) elif linear and len(digested_reactant)==3: digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) # check digested_reactant From c22bfec3f15da2ad0f90b30ff5192625d712571d Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 1 Aug 2022 13:27:54 +0100 Subject: [PATCH 06/42] removing -1 from sequence computation on open backbone --- sbol_utilities/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index b413f16e..ef7c085c 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -597,8 +597,8 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo # get backbone sequence backbone_sequence = backbone.sequences[0].lookup().elements # compute open backbone sequences - open_backbone_sequence_from_location1=backbone_sequence[backbone.features[-1].locations[0].start -1 : backbone.features[-1].locations[0].end -1] - open_backbone_sequence_from_location2=backbone_sequence[backbone.features[-1].locations[1].start -1 : backbone.features[-1].locations[1].end-1] + open_backbone_sequence_from_location1=backbone_sequence[backbone.features[-1].locations[0].start -1 : backbone.features[-1].locations[0].end] + open_backbone_sequence_from_location2=backbone_sequence[backbone.features[-1].locations[1].start -1 : backbone.features[-1].locations[1].end] # extract part sequence part_sequence = part.sequences[0].lookup().elements # make new component sequence From 3d7ed6ae4d5c83151a00fc5b5fa27576db4c2b4e Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 1 Aug 2022 13:39:18 +0100 Subject: [PATCH 07/42] updating testing with sequence computation --- test/test_component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_component.py b/test/test_component.py index d3a4504f..0da0104d 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -374,8 +374,8 @@ def test_sep055(self): hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence = part_in_backbone(identity_pib, part=test_promoter, backbone=test_backbone) hlc_doc.add([hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence]) backbone_sequence = test_backbone.sequences[0].lookup().elements - open_backbone_sequence_from_location1=backbone_sequence[test_backbone.features[-1].locations[0].start -1 : test_backbone.features[-1].locations[0].end -1] - open_backbone_sequence_from_location2=backbone_sequence[test_backbone.features[-1].locations[1].start -1 : test_backbone.features[-1].locations[1].end-1] + open_backbone_sequence_from_location1=backbone_sequence[test_backbone.features[-1].locations[0].start -1 : test_backbone.features[-1].locations[0].end] + open_backbone_sequence_from_location2=backbone_sequence[test_backbone.features[-1].locations[1].start -1 : test_backbone.features[-1].locations[1].end] part_sequence = test_promoter.sequences[0].lookup().elements part_in_backbone_seq_str = part_sequence + open_backbone_sequence_from_location2 + open_backbone_sequence_from_location1 part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity_pib, part_in_backbone_seq_str) From b954a19dee75318a0e665ac6aeb64dd5d15447cd Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Fri, 26 Aug 2022 14:47:59 +0100 Subject: [PATCH 08/42] advances in ligation and is_circular helper function --- sbol_utilities/component.py | 104 +++++++++++++++++++++++++++-- sbol_utilities/helper_functions.py | 9 ++- 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index ef7c085c..ddbee4d8 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -669,6 +669,8 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall # check digested_reactant prefix, part_extract, suffix = digested_reactant else: raise NotImplementedError('The reactant has no valid topology type') + # Compute the lenth of single strand sticky ends or fusion sites + digested_reactant_5_prime_ss_strand, digested_reactant_5_prime_ss_end = digested_reactant.five_prime_end() # Extracting roles from features reactant_features_roles = [] for f in reactant.features: @@ -676,17 +678,23 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall reactant_features_roles.append(r) # if part if any(n==tyto.SO.engineered_insert for n in reactant_features_roles): - product_sequence = part_extract.seq - prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=str(product_sequence)) + product_sequence = str(part_extract.seq) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence) #str(product_sequence)) # add sticky ends features + five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=dropout_location[0]+fusion_site_length, order=1) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=dropout_location[1]-fusion_site_length, end=dropout_location[1], order=3) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + # if backbone elif any(n==tyto.SO.deletion for n in reactant_features_roles): - product_sequence = backbone.seq - prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=str(product_sequence)) + product_sequence = str(backbone.seq) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence) #str(product_sequence)) # add sticky ends features # add recognition site features else: raise NotImplementedError('The reactant has no valid roles') + # + # Create reactant Participation. react_subcomp = sbol3.SubComponent(reactant) assembly_plan.features.append(react_subcomp) @@ -702,4 +710,92 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall interaction = sbol3.Interaction(types=[tyto.SBO.cleavage], participations=participations) assembly_plan.interactions.append(interaction) + return prod_comp, prod_seq + +def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: + """Ligates Components using base complementarity and creates a product Component and a ligation Interaction. + + :param reactant: DNA to be ligated as SBOL Component. + :return: A tuple of Component and Interaction. + """ + five_prime_overhangs = [] + three_prime_overhangs = [] + for reactant in reactants: + flank_control = [0,0] + for feature in reactant.features: + if feature.role == sbol3.SO.restriction_enzyme_five_prime_single_strand_overhang: + five_prime_overhangs.append(feature) + flank_control[0] = 1 + elif feature.role == sbol3.SO.restriction_enzyme_three_prime_single_strand_overhang: + three_prime_overhangs.append(feature) + flank_control[1] = 1 + + if flank_control == [1,1]: + pass + elif flank_control == [0,0]: + raise ValueError(f"No flanking single strand found in reactant {reactant.identity}") + elif flank_control == [1,0]: + raise ValueError(f"No flanking single strand found in 3 prime end on reactant {reactant.identity}") + elif flank_control == [0,1]: + raise ValueError(f"No flanking single strand found in 5 prime end on reactant {reactant.identity}") + else: + raise ValueError(f"Flanking single strand does not match recognized format on reactant {reactant.identity}") + + fusion_site_length = 4 # placeholder, get from reactant information + # TODO: build graph and fing all different paths or using structural pattern matching + # build graph + assembly_graph = nx.Graph() + for reactant in reactants: + assembly_graph.add_node(reactant) + for a,b in itertools.combinations(reactants,2): + reactant_a_sequence = a.sequences[0].lookup().elements + reactant_b_sequence = b.sequences[0].lookup().elements + if reactant_a_sequence[-fusion_site_length:] == reactant_b_sequence[:fusion_site_length]: + assembly_graph.add_edge(a,b, {'fusion_site':reactant_a_sequence[-fusion_site_length:]}) + if reactant_a_sequence[:fusion_site_length-1] == reactant_b_sequence[-fusion_site_length:]: + assembly_graph.add_edge(a,b, {'fusion_site':reactant_b_sequence[:fusion_site_length-1]}) + # find all paths that leads to an assembly product + + + # TODO: breadth search for all paths + ''' + parts_to_combine = {reactants} + terminal_parts = set() + while parts_to_combine: + next_part = parts_to_combine.pop() + new_parts = find all combinations that can be made with elements in parts_to_combine + if new_parts: + add new_parts to parts_to_combine + else: + terminal_parts.add(next_part) + ''' + pending_reactants= {reactants} + alignments = [] + closed = False + five_prime_end = False + three_prime_end = False + + while pending_reactants: + if not alignments: + alignments.append([pending_reactants.pop()]) + + for alignment in alignments: + #5 prime end + for reactant in reactants: + if alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == reactant.sequences[0].lookup().elements[-fusion_site_length:]: + # repleace alignment with an uppdated version of the form [reactant, alignment[0]] + pass + if alignment[0].sequences[0].lookup().elements[-fusion_site_length:] == reactant.sequences[0].lookup().elements[:fusion_site_length-1]: + # repleace alignment with an uppdated version of the form [alignment[0], reactant] + pass + # if no match mark as terminal part + # if start fusion site == end fusion site mark as closed + # remove used reactants from pending_reactants + + #create preceed constrain + #create composite part or part in backbone + #add interactions to assembly_plan + + prod_comp = 'to do' + prod_seq = 'to do' return prod_comp, prod_seq \ No newline at end of file diff --git a/sbol_utilities/helper_functions.py b/sbol_utilities/helper_functions.py index 168debfc..5b3b1f21 100644 --- a/sbol_utilities/helper_functions.py +++ b/sbol_utilities/helper_functions.py @@ -363,4 +363,11 @@ def is_circular(obj: Union[sbol3.Component, sbol3.LocalSubComponent, sbol3.Exter :param obj: design to be checked :return: true if circular """ - return any(n==sbol3.SO_CIRCULAR for n in obj.types) \ No newline at end of file + return any(n==sbol3.SO_CIRCULAR for n in obj.types) + +def is_linear(obj: Union[sbol3.Component, sbol3.LocalSubComponent, sbol3.ExternallyDefined]) -> bool: + """Check if an SBOL Component or Feature is linear. + :param obj: design to be checked + :return: true if linear + """ + return any(n==sbol3.SO_LINEAR for n in obj.types) \ No newline at end of file From 1fa1049558a92b1193a1f7fe3ae25f4387307707 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 31 Aug 2022 14:12:13 +0100 Subject: [PATCH 09/42] advances on ligation process logic --- sbol_utilities/component.py | 73 ++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index ddbee4d8..3c0856fe 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -769,28 +769,59 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> T else: terminal_parts.add(next_part) ''' - pending_reactants= {reactants} - alignments = [] - closed = False - five_prime_end = False - three_prime_end = False - - while pending_reactants: - if not alignments: - alignments.append([pending_reactants.pop()]) - + alignments = [[r] for r in reactants] # like [[A],[B1],[B2],[C]]] and [[A,B1,C],[B1],[B2],[C]] + used_fusion_sites = set() + final_products = [] # [[A,B1,C]] + while alignments: + closed = False + five_prime_end = False + three_prime_end = False + # get the first item and remove it from the list + working_alignment = alignments[0] + alignments.pop(0) + # compare to all other alignments for alignment in alignments: - #5 prime end - for reactant in reactants: - if alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == reactant.sequences[0].lookup().elements[-fusion_site_length:]: - # repleace alignment with an uppdated version of the form [reactant, alignment[0]] - pass - if alignment[0].sequences[0].lookup().elements[-fusion_site_length:] == reactant.sequences[0].lookup().elements[:fusion_site_length-1]: - # repleace alignment with an uppdated version of the form [alignment[0], reactant] - pass - # if no match mark as terminal part - # if start fusion site == end fusion site mark as closed - # remove used reactants from pending_reactants + # if working alignment 5' end matches a alignment 3' end + if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == alignment.sequences[0].lookup().elements[-fusion_site_length:]: + # if in used_fusion_sites, skip + if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] in used_fusion_sites: + raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") + # if repeated elements pass + if(all(x in working_alignment for x in alignment)): + raise ValueError(f"Repeated elements in alignment {alignment}") + + working_alignment = alignment + working_alignment + else: + five_prime_end = True + # if working_alignment is closed, add to final_products + if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == working_alignment.sequences[0].lookup().elements[-fusion_site_length:]: + final_products.append(working_alignment) + closed = True + break + ################################################ + # if alignment 3' end matches a reactant 5' end + if working_alignment[-1].sequences[0].lookup().elements[-fusion_site_length:] == alignment.sequences[0].lookup().elements[:fusion_site_length-1]: + # if in used_fusion_sites, raise error + if working_alignment[-1].sequences[0].lookup().elements[-fusion_site_length:] in used_fusion_sites: + raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") + # if repeated elements, raise error + if(all(x in working_alignment for x in alignment)): + raise ValueError(f"Repeated elements in alignment {alignment}") + + working_alignment = working_alignment + alignment + else: + three_prime_end = True + # if working_alignment is closed, add to final_products + if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == working_alignment.sequences[0].lookup().elements[-fusion_site_length:]: + final_products.append(working_alignment) + closed = True + break + elif five_prime_end and three_prime_end: + final_products.append(working_alignment) + break + else: alignments = working_alignment + alignments + + # use final products to build assembly product somponent #create preceed constrain #create composite part or part in backbone From 7efbb2c19e9a7df3e2097a25b9799e296dd1d4bf Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 31 Aug 2022 17:20:52 +0100 Subject: [PATCH 10/42] advances on components creation --- sbol_utilities/component.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 3c0856fe..b044187f 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -822,7 +822,19 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> T else: alignments = working_alignment + alignments # use final products to build assembly product somponent - + product_component_list = [] + for composite in final_products: # a composite of the form [A,B,C] + composite_number = 1 + # calculate sequence + composite_sequence_str = "" + for part in composite: + composite_sequence_str = composite_sequence_str + part.sequences[0].lookup().elements[:-fusion_site_length] + # create dna componente and sequence + composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}', composite_sequence_str) # **kwarads use + composite_component.types.append() + composite_component.roles.append() + composite_component.features = composite + composite_number += 1 #create preceed constrain #create composite part or part in backbone #add interactions to assembly_plan From bbf41386b150aa9816ac59629bc4038d6a2ad9c3 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Thu, 1 Sep 2022 17:10:36 +0100 Subject: [PATCH 11/42] ligation prototype --- sbol_utilities/component.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index b044187f..fbb601e6 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -712,7 +712,7 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall return prod_comp, prod_seq -def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: +def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> List[Tuple[sbol3.Component, sbol3.Sequence]]: """Ligates Components using base complementarity and creates a product Component and a ligation Interaction. :param reactant: DNA to be ligated as SBOL Component. @@ -822,23 +822,36 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> T else: alignments = working_alignment + alignments # use final products to build assembly product somponent - product_component_list = [] + products_list = [] + participations = [] for composite in final_products: # a composite of the form [A,B,C] composite_number = 1 # calculate sequence composite_sequence_str = "" for part in composite: - composite_sequence_str = composite_sequence_str + part.sequences[0].lookup().elements[:-fusion_site_length] + composite_sequence_str = composite_sequence_str + part.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear + # create participations + part_subcomponent = sbol3.SubComponent(part) # LocalSubComponent?? + # if not in assemblye plan? + assembly_plan.features.append(part_subcomponent) + part_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=part_subcomponent) + participations.append(part_participation) # create dna componente and sequence - composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}', composite_sequence_str) # **kwarads use + composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}', composite_sequence_str) # **kwarads use in future? composite_component.types.append() - composite_component.roles.append() + composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) composite_component.features = composite + # fix order of features + composite_component.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, composite_component.features[composite_number-1], composite_component.features[composite_number])) + # add product participation + composite_subcomponent = sbol3.SubComponent(composite_component) + participations.append(sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=composite_subcomponent)) + # create interactions + assembly_plan.interactions.append(sbol3.Interaction(types=[tyto.SBO.conversion], participations=participations)) + products_list.append([composite_component, composite_seq]) composite_number += 1 #create preceed constrain #create composite part or part in backbone #add interactions to assembly_plan - prod_comp = 'to do' - prod_seq = 'to do' - return prod_comp, prod_seq \ No newline at end of file + return products_list \ No newline at end of file From 0adb6eec599407b3b901cb380006d8745b34bf4d Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Fri, 9 Sep 2022 16:30:44 +0100 Subject: [PATCH 12/42] assembly and part in backbone2 --- sbol_utilities/component.py | 107 +++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 9 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index fbb601e6..cc864664 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -347,7 +347,7 @@ def rbs(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3. :return: A tuple of Component and Sequence. """ rbs_component, rbs_seq = dna_component_with_sequence(identity, sequence, **kwargs) - rbs_component.roles. append(sbol3.SO_RBS) + rbs_component.roles.append(sbol3.SO_RBS) return rbs_component, rbs_seq @@ -360,7 +360,7 @@ def cds(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3. :return: A tuple of Component and Sequence. """ cds_component, cds_seq = dna_component_with_sequence(identity, sequence, **kwargs) - cds_component.roles. append(sbol3.SO_CDS) + cds_component.roles.append(sbol3.SO_CDS) return cds_component, cds_seq @@ -373,7 +373,7 @@ def terminator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, :return: A tuple of Component and Sequence. """ terminator_component, terminator_seq = dna_component_with_sequence(identity, sequence, **kwargs) - terminator_component.roles. append(sbol3.SO_TERMINATOR) + terminator_component.roles.append(sbol3.SO_TERMINATOR) return terminator_component, terminator_seq @@ -386,7 +386,7 @@ def protein_stability_element(identity: str, sequence: str, **kwargs) -> Tuple[s :return: A tuple of Component and Sequence. """ pse_component, protein_stability_element_seq = dna_component_with_sequence(identity, sequence, **kwargs) - pse_component.roles. append(tyto.SO.protein_stability_element) + pse_component.roles.append(tyto.SO.protein_stability_element) return pse_component, protein_stability_element_seq @@ -399,7 +399,7 @@ def gene(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3 :return: A tuple of Component and Sequence. """ gene_component, gene_seq = dna_component_with_sequence(identity, sequence, **kwargs) - gene_component.roles. append(sbol3.SO_GENE) + gene_component.roles.append(sbol3.SO_GENE) return gene_component, gene_seq @@ -412,7 +412,7 @@ def operator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, s :return: A tuple of Component and Sequence. """ operator_component, operator_seq = dna_component_with_sequence(identity, sequence, **kwargs) - operator_component.roles. append(sbol3.SO_OPERATOR) + operator_component.roles.append(sbol3.SO_OPERATOR) return operator_component, operator_seq @@ -449,7 +449,7 @@ def mrna(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, sbol3 :return: A tuple of Component and Sequence. """ mrna_component, mrna_seq = rna_component_with_sequence(identity, sequence, **kwargs) - mrna_component.roles. append(sbol3.SO_MRNA) + mrna_component.roles.append(sbol3.SO_MRNA) return mrna_component, mrna_seq @@ -462,7 +462,7 @@ def transcription_factor(identity: str, sequence: str, **kwargs) -> Tuple[sbol3. :return: A tuple of Component and Sequence. """ tf_component, transcription_factor_seq = protein_component_with_sequence(identity, sequence, **kwargs) - tf_component.roles. append(sbol3.SO_TRANSCRIPTION_FACTOR) + tf_component.roles.append(sbol3.SO_TRANSCRIPTION_FACTOR) return tf_component, transcription_factor_seq @@ -854,4 +854,93 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L #create composite part or part in backbone #add interactions to assembly_plan - return products_list \ No newline at end of file + return products_list + +class Assembly_plan_composite_in_backbone_single_enzyme(): + """Creates a Assembly Plan. + #classes uses param here? + :param parts_in_backbone: Parts in backbone to be assembled. + :param acceptor_backbone: Backbone in which parts are inserted on the assembly. + :param restriction_enzymes: Restriction enzyme with correct name from Bio.Restriction as Externally Defined. + :param linear: Boolean to inform if the reactant is linear. + :param circular: Boolean to inform if the reactant is circular. + :param **kwargs: Keyword arguments of any other Component attribute for the assembled part. + """ + + def __init__(self, name: str, parts_in_backbone: List[sbol3.Component], acceptor_backbone: sbol3.Component, restriction_enzyme: Union[str,sbol3.ExternallyDefined], document:sbol3.Document): + self.name = name + self.parts_in_backbone = parts_in_backbone + self.acceptor_backbone = acceptor_backbone + self.restriction_enzyme = restriction_enzyme + self.unitary_parts = None + self.products = None + self.extracted_parts = [] + self.assembly_plan_component = None + self.document = document + + #create assembly plan + self.assembly_plan_component = sbol3.Component(identity=f'{self.name}_assembly_plan', types=sbol3.SBO_FUNCTIONAL_ENTITY) + self.document.add(self.assembly_plan_component) + + def run(self): + self.assembly_plan_component.features.append(self.restriction_enzyme) + #extract parts + part_number = 1 + for part_in_backbone in self.parts_in_backbone: + part_comp, part_seq = digestion(reactant=part_in_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}') + self.document.add([part_comp, part_seq]) + self.extracted_parts.append(part_comp) + part_number += 1 + + #extract backbone (should be the same?) + backbone_comp, backbone_seq = digestion(reactant=self.acceptor_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}') + self.document.add([backbone_comp, backbone_seq]) + self.extracted_parts.append(backbone_comp) + + #create composite part from extracted parts + composites_comp = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component) + self.products = composites_comp + for composite in composites_comp: + self.document.add(composite) + +def part_in_backbone2(identity: str, sequence: str, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Creates a Backbone Component and its Sequence. + + :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. + :param sequence: The DNA sequence of the Component encoded in IUPAC. + :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param kwargs: Keyword arguments of any other Component attribute. + :return: A tuple of Component and Sequence. + """ + if len(part_location) != 2: + raise ValueError('The part_location only accepts 2 int values in a list.') + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + for part_role in part_roles: + part_in_backbone_component.roles.append(part_role) + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1]) + insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) + part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) + part_sequence_feature.roles.append(tyto.SO.engineered_insert) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + if linear: + part_in_backbone_component.types.append(sbol3.SO_LINEAR) + part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + else: + part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) + part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + part_in_backbone_component.features.append(part_sequence_feature) + part_in_backbone_component.features.append(insertion_sites_feature) + part_in_backbone_component.features.append(open_backbone_feature) + backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_dropout_meets) + return part_in_backbone_component, part_in_backbone_seq \ No newline at end of file From b9709022e10538131a22f33dd121f681db0b1c1f Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 28 Sep 2022 10:59:39 +0100 Subject: [PATCH 13/42] functional digestion --- sbol_utilities/component.py | 137 ++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 77 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index cc864664..04d78dd6 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -718,57 +718,13 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L :param reactant: DNA to be ligated as SBOL Component. :return: A tuple of Component and Interaction. """ - five_prime_overhangs = [] - three_prime_overhangs = [] - for reactant in reactants: - flank_control = [0,0] - for feature in reactant.features: - if feature.role == sbol3.SO.restriction_enzyme_five_prime_single_strand_overhang: - five_prime_overhangs.append(feature) - flank_control[0] = 1 - elif feature.role == sbol3.SO.restriction_enzyme_three_prime_single_strand_overhang: - three_prime_overhangs.append(feature) - flank_control[1] = 1 - - if flank_control == [1,1]: - pass - elif flank_control == [0,0]: - raise ValueError(f"No flanking single strand found in reactant {reactant.identity}") - elif flank_control == [1,0]: - raise ValueError(f"No flanking single strand found in 3 prime end on reactant {reactant.identity}") - elif flank_control == [0,1]: - raise ValueError(f"No flanking single strand found in 5 prime end on reactant {reactant.identity}") - else: - raise ValueError(f"Flanking single strand does not match recognized format on reactant {reactant.identity}") - - fusion_site_length = 4 # placeholder, get from reactant information - # TODO: build graph and fing all different paths or using structural pattern matching - # build graph - assembly_graph = nx.Graph() - for reactant in reactants: - assembly_graph.add_node(reactant) - for a,b in itertools.combinations(reactants,2): - reactant_a_sequence = a.sequences[0].lookup().elements - reactant_b_sequence = b.sequences[0].lookup().elements - if reactant_a_sequence[-fusion_site_length:] == reactant_b_sequence[:fusion_site_length]: - assembly_graph.add_edge(a,b, {'fusion_site':reactant_a_sequence[-fusion_site_length:]}) - if reactant_a_sequence[:fusion_site_length-1] == reactant_b_sequence[-fusion_site_length:]: - assembly_graph.add_edge(a,b, {'fusion_site':reactant_b_sequence[:fusion_site_length-1]}) - # find all paths that leads to an assembly product - - - # TODO: breadth search for all paths - ''' - parts_to_combine = {reactants} - terminal_parts = set() - while parts_to_combine: - next_part = parts_to_combine.pop() - new_parts = find all combinations that can be made with elements in parts_to_combine - if new_parts: - add new_parts to parts_to_combine - else: - terminal_parts.add(next_part) - ''' + # get all fusion sites + five_prime_fusion_sites = set() + three_prime_fusion_sites = set() + for r in reactants: + five_prime_fusion_sites.add(r.sequences[0].lookup().elements[:r.features[0].locations[0].end]) + three_prime_fusion_sites.add(r.sequences[0].lookup().elements[r.features[0].locations[1].start:]) + alignments = [[r] for r in reactants] # like [[A],[B1],[B2],[C]]] and [[A,B1,C],[B1],[B2],[C]] used_fusion_sites = set() final_products = [] # [[A,B1,C]] @@ -781,53 +737,78 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L alignments.pop(0) # compare to all other alignments for alignment in alignments: + #working_alignment_5_prime_fusion_site_length = working_alignment[0].features[0].locations[0].end + #alignment_3_prime_fusion_site_length = alignment[-1].features[0].locations[1].start + working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] + working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] + alignment_5_prime_fusion_site = alignment[0].sequences[0].lookup().elements[:alignment[0].features[0].locations[0].end] + alignment_3_prime_fusion_site = alignment[-1].sequences[0].lookup().elements[alignment[-1].features[0].locations[1].start:] # if working alignment 5' end matches a alignment 3' end - if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == alignment.sequences[0].lookup().elements[-fusion_site_length:]: + if working_alignment_5_prime_fusion_site == alignment_3_prime_fusion_site: # if in used_fusion_sites, skip - if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] in used_fusion_sites: + if working_alignment_5_prime_fusion_site in used_fusion_sites: raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") + else: used_fusion_sites.add(working_alignment_5_prime_fusion_site) # if repeated elements pass - if(all(x in working_alignment for x in alignment)): - raise ValueError(f"Repeated elements in alignment {alignment}") + #if(all(x in working_alignment for x in alignment)): + # raise ValueError(f"Repeated elements in alignment {alignment}") working_alignment = alignment + working_alignment - else: + + working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] + working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] + + # if working alignment 5' end does not matches any 3' fusion site + if working_alignment_5_prime_fusion_site not in three_prime_fusion_sites: five_prime_end = True + # if working_alignment is closed, add to final_products - if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == working_alignment.sequences[0].lookup().elements[-fusion_site_length:]: + if working_alignment_5_prime_fusion_site == working_alignment_3_prime_fusion_site: final_products.append(working_alignment) closed = True break + ################################################ - # if alignment 3' end matches a reactant 5' end - if working_alignment[-1].sequences[0].lookup().elements[-fusion_site_length:] == alignment.sequences[0].lookup().elements[:fusion_site_length-1]: + # if working alignment 3' end matches a alignment 5' end + if working_alignment_3_prime_fusion_site == alignment_5_prime_fusion_site: # if in used_fusion_sites, raise error - if working_alignment[-1].sequences[0].lookup().elements[-fusion_site_length:] in used_fusion_sites: + if working_alignment_3_prime_fusion_site in used_fusion_sites: raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") # if repeated elements, raise error - if(all(x in working_alignment for x in alignment)): - raise ValueError(f"Repeated elements in alignment {alignment}") + #if(all(x in working_alignment for x in alignment)): + # raise ValueError(f"Repeated elements in alignment {alignment}") working_alignment = working_alignment + alignment - else: + + working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] + working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] + + # if working alignment 5' end does not matches any 3' fusion site + if working_alignment_3_prime_fusion_site not in five_prime_fusion_sites: three_prime_end = True + # if working_alignment is closed, add to final_products - if working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1] == working_alignment.sequences[0].lookup().elements[-fusion_site_length:]: + if working_alignment_5_prime_fusion_site == working_alignment_3_prime_fusion_site: final_products.append(working_alignment) closed = True - break - elif five_prime_end and three_prime_end: + break + # if no match, add to final products + if five_prime_end and three_prime_end: final_products.append(working_alignment) - break - else: alignments = working_alignment + alignments + break + # TODO: feed working alignment to alignments + #alignments.insert(0, working_alignment) + # use final products to build assembly product somponent + fusion_site_length = 4 products_list = [] participations = [] for composite in final_products: # a composite of the form [A,B,C] - composite_number = 1 + composite_number = 0 # calculate sequence composite_sequence_str = "" + composite_name = "" for part in composite: composite_sequence_str = composite_sequence_str + part.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear # create participations @@ -836,23 +817,25 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L assembly_plan.features.append(part_subcomponent) part_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=part_subcomponent) participations.append(part_participation) + composite_name = composite_name + part.name # create dna componente and sequence - composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}', composite_sequence_str) # **kwarads use in future? - composite_component.types.append() + composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}_{composite_name}', composite_sequence_str) # **kwarads use in future? + #composite_component.types.append() composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) - composite_component.features = composite - # fix order of features - composite_component.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, composite_component.features[composite_number-1], composite_component.features[composite_number])) + #composite_component.features = composite + # TODO fix order of features + #composite_component.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, composite_component.features[composite_number-1], composite_component.features[composite_number])) # add product participation - composite_subcomponent = sbol3.SubComponent(composite_component) - participations.append(sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=composite_subcomponent)) + #composite_subcomponent = sbol3.SubComponent(composite_component) + #participations.append(sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=composite_subcomponent)) # create interactions - assembly_plan.interactions.append(sbol3.Interaction(types=[tyto.SBO.conversion], participations=participations)) + #assembly_plan.interactions.append(sbol3.Interaction(types=[tyto.SBO.conversion], participations=participations)) products_list.append([composite_component, composite_seq]) composite_number += 1 #create preceed constrain #create composite part or part in backbone #add interactions to assembly_plan + #add participations to assembly_plan return products_list From 627dba9dc6e27b8ff90d1c01b44d26287869e123 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Tue, 17 Jan 2023 15:11:53 +0000 Subject: [PATCH 14/42] updated components and tests --- sbol_utilities/component.py | 88 +++++++++++------------ test/test_component.py | 139 +++++++++++++++++++++++++++++++++++- 2 files changed, 180 insertions(+), 47 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 04d78dd6..4c040209 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -624,6 +624,48 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.types.append(topology_type) return part_in_backbone_component, part_in_backbone_seq +def part_in_backbone2(identity: str, sequence: str, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Creates a Backbone Component and its Sequence. + + :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. + :param sequence: The DNA sequence of the Component encoded in IUPAC. + :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param kwargs: Keyword arguments of any other Component attribute. + :return: A tuple of Component and Sequence. + """ + if len(part_location) != 2: + raise ValueError('The part_location only accepts 2 int values in a list.') + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + for part_role in part_roles: + part_in_backbone_component.roles.append(part_role) + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) + insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) + part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) + part_sequence_feature.roles.append(tyto.SO.engineered_insert) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + if linear: + part_in_backbone_component.types.append(sbol3.SO_LINEAR) + part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + else: + part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) + part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + part_in_backbone_component.features.append(part_sequence_feature) + part_in_backbone_component.features.append(insertion_sites_feature) + part_in_backbone_component.features.append(open_backbone_feature) + backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_part_meets) + return part_in_backbone_component, part_in_backbone_seq + def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. The product Component is assumed to be the insert for parts in backbone and the backbone for backbones. @@ -693,8 +735,6 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall # add recognition site features else: raise NotImplementedError('The reactant has no valid roles') - # - # Create reactant Participation. react_subcomp = sbol3.SubComponent(reactant) assembly_plan.features.append(react_subcomp) @@ -884,46 +924,4 @@ def run(self): composites_comp = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component) self.products = composites_comp for composite in composites_comp: - self.document.add(composite) - -def part_in_backbone2(identity: str, sequence: str, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: - """Creates a Backbone Component and its Sequence. - - :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. - :param sequence: The DNA sequence of the Component encoded in IUPAC. - :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. - :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) - :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. - :param kwargs: Keyword arguments of any other Component attribute. - :return: A tuple of Component and Sequence. - """ - if len(part_location) != 2: - raise ValueError('The part_location only accepts 2 int values in a list.') - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) - part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) - for part_role in part_roles: - part_in_backbone_component.roles.append(part_role) - part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1]) - insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) - insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) - part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) - part_sequence_feature.roles.append(tyto.SO.engineered_insert) - insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) - if linear: - part_in_backbone_component.types.append(sbol3.SO_LINEAR) - part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) - open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) - open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) - open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) - else: - part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) - part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) - open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) - open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) - open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) - part_in_backbone_component.features.append(part_sequence_feature) - part_in_backbone_component.features.append(insertion_sites_feature) - part_in_backbone_component.features.append(open_backbone_feature) - backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) - part_in_backbone_component.constraints.append(backbone_dropout_meets) - return part_in_backbone_component, part_in_backbone_seq \ No newline at end of file + self.document.add(composite) \ No newline at end of file diff --git a/test/test_component.py b/test/test_component.py index 0da0104d..84d62bc1 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -15,7 +15,8 @@ protein_stability_element, gene, operator, engineered_region, mrna, transcription_factor, \ strain, ed_simple_chemical, ed_protein -from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone +from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone2, \ + digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme from sbol_utilities.helper_functions import find_top_level, toplevel_named, TopLevelNotFound, outgoing_links from sbol_utilities.sbol_diff import doc_diff @@ -373,9 +374,11 @@ def test_sep055(self): hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence = part_in_backbone(identity_pib, part=test_promoter, backbone=test_backbone) hlc_doc.add([hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence]) + backbone_sequence = test_backbone.sequences[0].lookup().elements open_backbone_sequence_from_location1=backbone_sequence[test_backbone.features[-1].locations[0].start -1 : test_backbone.features[-1].locations[0].end] open_backbone_sequence_from_location2=backbone_sequence[test_backbone.features[-1].locations[1].start -1 : test_backbone.features[-1].locations[1].end] + part_sequence = test_promoter.sequences[0].lookup().elements part_in_backbone_seq_str = part_sequence + open_backbone_sequence_from_location2 + open_backbone_sequence_from_location1 part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity_pib, part_in_backbone_seq_str) @@ -399,7 +402,6 @@ def test_sep055(self): hlc_doc.add([test_promoter, test_promoter_seq, test_backbone, test_backbone_seq]) doc.add([test_promoter, test_promoter_seq, test_backbone, test_backbone_seq]) - hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence = part_in_backbone(identity_pib, part=test_promoter, backbone=test_backbone, linear=True) hlc_doc.add([hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence]) @@ -418,5 +420,138 @@ def test_sep055(self): doc.add([part_in_backbone_component_linear, part_in_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + + test_promoter, test_promoter_seq = promoter('pTest', 'aaTTaa') + part_location = [4,14] + fusion_site_length = 4 + test_backbone, test_backbone_seq = backbone('test_bb','cccGGGGTTGGGGccc', dropout_location, fusion_site_length, linear=False) + identity_pib = 'part_in_backbone2' + sequence_str_pib = 'cccGGGGTTGGGGccc' + part_role = sbol3.SO_PROMOTER + + hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence = part_in_backbone2(identity=identity_pib, sequence=sequence_str_pib, + part_location=part_location, part_roles=[part_role], fusion_site_length=fusion_site_length, linear=False) + hlc_doc.add([hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence]) + + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity=identity_pib, sequence=sequence_str_pib) + part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + part_in_backbone_component.roles.append(sbol3.SO_PROMOTER) + + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) + insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) + part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=[part_role]) + part_sequence_feature.roles.append(tyto.SO.engineered_insert) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + + #circular + part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) + part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + + part_in_backbone_component.features.append(part_sequence_feature) + part_in_backbone_component.features.append(insertion_sites_feature) + part_in_backbone_component.features.append(open_backbone_feature) + backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_part_meets) + doc.add([part_in_backbone_component, part_in_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Circular {identity_pib}' + + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + + hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence = part_in_backbone2(identity=identity_pib, sequence=sequence_str_pib, + part_location=part_location, part_roles=[part_role], fusion_site_length=fusion_site_length, linear=True) + hlc_doc.add([hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence]) + + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity=identity_pib, sequence=sequence_str_pib) + part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + part_in_backbone_component.roles.append(sbol3.SO_PROMOTER) + + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) + insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) + part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=[part_role]) + part_sequence_feature.roles.append(tyto.SO.engineered_insert) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + + #linear + part_in_backbone_component.types.append(sbol3.SO_LINEAR) + part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + + part_in_backbone_component.features.append(part_sequence_feature) + part_in_backbone_component.features.append(insertion_sites_feature) + part_in_backbone_component.features.append(open_backbone_feature) + backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_part_meets) + doc.add([part_in_backbone_component, part_in_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' + + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + + bsai = ed_restriction_enzyme('BsaI') + #added by Assembly + + #lvl1 acceptor + lvl1_pOdd_acceptor_seq = 'gctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgggagtgagacccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggctttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcacacatactagagaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttataggtctcaGCTTgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgaca' + podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') + doc.add([podd_backbone,podd_backbone_seq]) + + #parts in backbone + j23100_b0034_ac_seq_str = 'ttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcgggagtctTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCCTAGAGAAAGAGGAGAAATACTAGaatgCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtct' + sfgfp_ce_seq_str = 'ccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcgaatgcgtaaaggcgaggaactgttcactggtgtcgtccctattctggtggaactggatggtgatgtcaacggtcataagttttccgtgcgtggcgagggtgaaggtgacgcaactaatggtaaactgacgctgaagttcatctgtactactggtaaactgccggtaccttggccgactctggtaacgacgctgacttatggtgttcagtgctttgctcgttatccggaccatatgaagcagcatgacttcttcaagtccgccatgccggaaggctatgtgcaggaacgcacgatttcctttaaggatgacggcacgtacaaaacgcgtgcggaagtgaaatttgaaggcgataccctggtaaaccgcattgagctgaaaggcattgactttaaagaagacggcaatatcctgggccataagctggaatacaattttaacagccacaatgtttacatcaccgccgataaacaaaaaaatggcattaaagcgaattttaaaattcgccacaacgtggaggatggcagcgtgcagctggctgatcactaccagcaaaacactccaatcggtgatggtcctgttctgctgccagacaatcactatctgagcacgcaaagcgttctgtctaaagatccgaacgagaaacgcgatcatatggttctgctggagttcgtaaccgcagcgggcatcacgcatggtatggatgaactgtacaaatgatgagcttCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtg' + b0015_ef_seq_str = 'aagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcggcttccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacgctCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaac' + + j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone2('j23100_b0034_ac_in_bb', j23100_b0034_ac_seq_str, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') + doc.add([j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq]) + + sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq = part_in_backbone2('sfgfp_ce_in_bb', sfgfp_ce_seq_str, [130,854], [sbol3.SO_CDS], 4, False, name='sfgfp_ce_in_bb') + doc.add([sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq]) + + b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone2('b0015_ef_in_bb', b0015_ef_seq_str, [514,650], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) + + #Assembly plan + test_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( + name='constitutive_gfp_tu', + parts_in_backbone=[j23100_b0034_ac_in_bb, sfgfp_ce_in_bb, b0015_ef_in_bb], + acceptor_backbone=podd_backbone, + restriction_enzyme=bsai, + document=doc) + + test_assembly_plan.run() + + #Check assembly plan + expected_assembled_j23100_b0034_ac_seq_str = j23100_b0034_ac_seq_str[475:545] + assembled_j23100_b0034_ac_seq_str = test_assembly_plan.extracted_parts[0].sequences[0].lookup().elements + assert expected_assembled_j23100_b0034_ac_seq_str==assembled_j23100_b0034_ac_seq_str, 'Constructor Error: First extracted part sequence does not match expected sequence' + + expected_assembled_sfgfp_ce_seq_str = sfgfp_ce_seq_str[129:854] + assembled_sfgfp_ce_seq_str = test_assembly_plan.extracted_parts[1].sequences[0].lookup().elements + assert expected_assembled_sfgfp_ce_seq_str==assembled_sfgfp_ce_seq_str, 'Constructor Error: Second extracted part sequence does not match expected sequence' + + expected_assembled_b0015_ef_seq_str = b0015_ef_seq_str[513:650] + assembled_b0015_ef_seq_str = test_assembly_plan.extracted_parts[2].sequences[0].lookup().elements + assert expected_assembled_b0015_ef_seq_str==assembled_b0015_ef_seq_str, 'Constructor Error: Third extracted part sequence does not match expected sequence' + + expected_assembled_open_backbone_seq_str = lvl1_pOdd_acceptor_seq[2255:] + lvl1_pOdd_acceptor_seq[:1172] + assembled_open_backbone_seq_str = test_assembly_plan.extracted_parts[-1].sequences[0].lookup().elements + assert expected_assembled_open_backbone_seq_str==assembled_open_backbone_seq_str, 'Constructor Error: Last extracted part (open backbone) sequence does not match expected sequence' + + expected_composite_seq_str = expected_assembled_open_backbone_seq_str[:-4] + expected_assembled_j23100_b0034_ac_seq_str[:-4] + expected_assembled_sfgfp_ce_seq_str[:-4] + expected_assembled_b0015_ef_seq_str[:-4] + assembled_composite_seq_str = test_assembly_plan.composites[0][0].sequences[0].lookup().elements + assert expected_composite_seq_str==assembled_composite_seq_str, 'Constructor Error: Composite sequence does not match expected sequence' + if __name__ == '__main__': unittest.main() From baa89eefda332038f974b4effa17ace8352f2af1 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 22 Mar 2023 13:04:58 +0000 Subject: [PATCH 15/42] updated version part_in_backbone_from_sbol --- sbol_utilities/component.py | 128 +++++++++++----------- test/test_component.py | 181 ++++++-------------------------- test/test_files/b0015.gb | 87 +++++++++++++++ test/test_files/j23100_b0034.gb | 82 +++++++++++++++ test/test_files/sfgfp.gb | 102 ++++++++++++++++++ 5 files changed, 371 insertions(+), 209 deletions(-) create mode 100644 test/test_files/b0015.gb create mode 100644 test/test_files/j23100_b0034.gb create mode 100644 test/test_files/sfgfp.gb diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 7d6b1efe..947eb94d 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Callable, Dict, Iterable, List, Union, Set, Optional, Tuple +from typing import Dict, Iterable, List, Union, Optional, Tuple import sbol3 import tyto @@ -89,34 +89,13 @@ def has_dna_type(component: sbol3.Component) -> bool: # there must be atleast 1 SO role, among others def check_roles(component: sbol3.Component) -> bool: - try: - return any(tyto.SO.get_term_by_uri(role) for role in component.roles) - except LookupError: - return False + return any(tyto.SO.get_term_by_uri(role) for role in component.roles) # check all conditions return isinstance(obj, sbol3.Component) and check_roles(obj) \ and has_dna_type(obj) and len(obj.sequences) == 1 -def by_roles(required_role: str) -> Callable[[sbol3.TopLevel], bool]: - """Given an object and a role, check if it is one of the roles of the object. - - :param required_role: the role which must be present in given object - :return: lambda function taking an obj to check roles in, returns bool - """ - return lambda obj: isinstance(obj, sbol3.Component) and required_role in obj.roles - - -def by_types(required_type: str) -> Callable[[sbol3.TopLevel], bool]: - """Given an object and a type, check if it is one of the types of the object. - - :param required_type: the type which must be present in given object - :return: lambda function taking an obj to check types in, returns bool - """ - return lambda obj: isinstance(obj, sbol3.Component) and required_type in obj.types - - def ensure_singleton_feature(system: sbol3.Component, target: Union[sbol3.Feature, sbol3.Component]): """Return a feature associated with the target, i.e., the target itself if a feature, or a SubComponent. If the target is not already in the system, add it. @@ -645,24 +624,33 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.types.append(topology_type) return part_in_backbone_component, part_in_backbone_seq -def part_in_backbone2(identity: str, sequence: str, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: - """Creates a Backbone Component and its Sequence. +def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, overwrite:bool=True, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Creates a Part in Backbone Component and its Sequence following BP011 from an unformatted SBOL3 Component. :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. - :param sequence: The DNA sequence of the Component encoded in IUPAC. - :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone. + :param part_location: List of 2 integers that indicates the start and the end of the unitary part sequence. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param part_roles: List of strings that indicates the roles of the part. :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param overwrite: Boolean that indicates if the input Component will be overwritten. By default it is seted to True. :param kwargs: Keyword arguments of any other Component attribute. :return: A tuple of Component and Sequence. """ if len(part_location) != 2: raise ValueError('The part_location only accepts 2 int values in a list.') - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + if len(sbol3_comp.sequences)!=1: + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol3_comp.sequences)} sequences') + sequence = sbol3_comp.sequences[0].lookup().elements + if overwrite: + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + else: + part_in_backbone_component = sbol3_comp + part_in_backbone_seq = sbol3_comp.sequences[0].lookup() part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) for part_role in part_roles: part_in_backbone_component.roles.append(part_role) - part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) + part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1]) insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) @@ -683,11 +671,11 @@ def part_in_backbone2(identity: str, sequence: str, part_location: List[int], p part_in_backbone_component.features.append(part_sequence_feature) part_in_backbone_component.features.append(insertion_sites_feature) part_in_backbone_component.features.append(open_backbone_feature) - backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) - part_in_backbone_component.constraints.append(backbone_part_meets) + backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) + part_in_backbone_component.constraints.append(backbone_dropout_meets) return part_in_backbone_component, part_in_backbone_seq -def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component)-> Tuple[sbol3.Component, sbol3.Sequence]: +def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component, **kwargs)-> Tuple[sbol3.Component, sbol3.Sequence]: """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. The product Component is assumed to be the insert for parts in backbone and the backbone for backbones. @@ -704,7 +692,7 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall for re in restriction_enzymes: enzyme = Restriction.__dict__[re.name] restriction_enzymes_pydna.append(enzyme) - assembly_plan.features.append(re) + #assembly_plan.features.append(re) modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re) participations.append(modifier_participation) @@ -725,15 +713,11 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall raise NotImplementedError(f'Not supported number of products. Found{len(digested_reactant)}') #TODO select them based on content rather than size. elif circular and len(digested_reactant)==2: - digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) part_extract, backbone = sorted(digested_reactant, key=len) elif linear and len(digested_reactant)==3: - digested_reactant = ds_reactant.cut(restriction_enzymes_pydna) - # check digested_reactant prefix, part_extract, suffix = digested_reactant else: raise NotImplementedError('The reactant has no valid topology type') - # Compute the lenth of single strand sticky ends or fusion sites - digested_reactant_5_prime_ss_strand, digested_reactant_5_prime_ss_end = digested_reactant.five_prime_end() + # Extracting roles from features reactant_features_roles = [] for f in reactant.features: @@ -741,21 +725,36 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall reactant_features_roles.append(r) # if part if any(n==tyto.SO.engineered_insert for n in reactant_features_roles): + # Compute the length of single strand sticky ends or fusion sites + product_5_prime_ss_strand, product_5_prime_ss_end = part_extract.seq.five_prime_end() + product_3_prime_ss_strand, product_3_prime_ss_end = part_extract.seq.three_prime_end() + product_sequence = str(part_extract.seq) - prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence) #str(product_sequence)) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence, **kwargs) #str(product_sequence)) # add sticky ends features - five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=dropout_location[0]+fusion_site_length, order=1) - three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=dropout_location[1]-fusion_site_length, end=dropout_location[1], order=3) - insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) - + five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end), end=len(product_sequence), order=3) + fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.features.append(fusion_sites_feature) + # if backbone elif any(n==tyto.SO.deletion for n in reactant_features_roles): + # Compute the length of single strand sticky ends or fusion sites + product_5_prime_ss_strand, product_5_prime_ss_end = backbone.seq.five_prime_end() + product_3_prime_ss_strand, product_3_prime_ss_end = backbone.seq.three_prime_end() product_sequence = str(backbone.seq) - prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence) #str(product_sequence)) + prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence, **kwargs) #str(product_sequence)) # add sticky ends features - # add recognition site features + five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end), end=len(product_sequence), order=3) + fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.features.append(fusion_sites_feature) + else: raise NotImplementedError('The reactant has no valid roles') + #Add reference to part in backbone + reactant_subcomponent = sbol3.SubComponent(reactant) + prod_comp.features.append(reactant_subcomponent) # Create reactant Participation. react_subcomp = sbol3.SubComponent(reactant) assembly_plan.features.append(react_subcomp) @@ -798,8 +797,6 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L alignments.pop(0) # compare to all other alignments for alignment in alignments: - #working_alignment_5_prime_fusion_site_length = working_alignment[0].features[0].locations[0].end - #alignment_3_prime_fusion_site_length = alignment[-1].features[0].locations[1].start working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] alignment_5_prime_fusion_site = alignment[0].sequences[0].lookup().elements[:alignment[0].features[0].locations[0].end] @@ -870,20 +867,23 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L # calculate sequence composite_sequence_str = "" composite_name = "" - for part in composite: - composite_sequence_str = composite_sequence_str + part.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear + #part_subcomponents = [] + part_extract_subcomponents = [] + for part_extract in composite: + composite_sequence_str = composite_sequence_str + part_extract.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear # create participations - part_subcomponent = sbol3.SubComponent(part) # LocalSubComponent?? - # if not in assemblye plan? - assembly_plan.features.append(part_subcomponent) - part_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=part_subcomponent) - participations.append(part_participation) - composite_name = composite_name + part.name + part_extract_subcomponent = sbol3.SubComponent(part_extract) # LocalSubComponent?? + part_extract_subcomponents.append(part_extract_subcomponent) + # if not in assembl plan? + #assembly_plan.features.append(part_extract_subcomponent) # should be saved at composite level + #part_subcomponents.append(part_subcomponent) + #part_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=part_subcomponent) + #participations.append(part_participation) + composite_name = composite_name + part_extract.name # create dna componente and sequence composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}_{composite_name}', composite_sequence_str) # **kwarads use in future? - #composite_component.types.append() composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) - #composite_component.features = composite + composite_component.features = part_extract_subcomponents # TODO fix order of features #composite_component.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, composite_component.features[composite_number-1], composite_component.features[composite_number])) # add product participation @@ -916,18 +916,18 @@ def __init__(self, name: str, parts_in_backbone: List[sbol3.Component], acceptor self.parts_in_backbone = parts_in_backbone self.acceptor_backbone = acceptor_backbone self.restriction_enzyme = restriction_enzyme - self.unitary_parts = None - self.products = None + self.products = [] self.extracted_parts = [] - self.assembly_plan_component = None self.document = document #create assembly plan self.assembly_plan_component = sbol3.Component(identity=f'{self.name}_assembly_plan', types=sbol3.SBO_FUNCTIONAL_ENTITY) self.document.add(self.assembly_plan_component) + self.composites = [] def run(self): self.assembly_plan_component.features.append(self.restriction_enzyme) + #store reactant c #extract parts part_number = 1 for part_in_backbone in self.parts_in_backbone: @@ -942,7 +942,9 @@ def run(self): self.extracted_parts.append(backbone_comp) #create composite part from extracted parts - composites_comp = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component) - self.products = composites_comp - for composite in composites_comp: + composites_list = ligation(reactants=self.extracted_parts, assembly_plan=self.assembly_plan_component) + for composite in composites_list: + composite[0].generated_by.append(self.assembly_plan_component) # + self.composites.append(composite) + self.products.append(composite) self.document.add(composite) \ No newline at end of file diff --git a/test/test_component.py b/test/test_component.py index 65646552..7c5beced 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -7,77 +7,35 @@ import sbol3 import tyto -from sbol_utilities.component import contained_components, contains, add_feature, add_interaction, constitutive, \ - regulate, order, in_role, all_in_role, ensure_singleton_feature, by_roles, by_types, is_dna_part, ed_restriction_enzyme -from sbol_utilities.helper_functions import filter_top_level +from sbol_utilities.component import contained_components, contains, add_feature, add_interaction, \ + constitutive, ed_restriction_enzyme, \ + regulate, order, in_role, all_in_role, ensure_singleton_feature, is_dna_part from sbol_utilities.component import dna_component_with_sequence, rna_component_with_sequence, \ protein_component_with_sequence, media, functional_component, promoter, rbs, cds, terminator, \ protein_stability_element, gene, operator, engineered_region, mrna, transcription_factor, \ strain, ed_simple_chemical, ed_protein -from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone2, \ +from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone_from_sbol, \ digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme from sbol_utilities.helper_functions import find_top_level, toplevel_named, TopLevelNotFound, outgoing_links from sbol_utilities.sbol_diff import doc_diff +from sbol_utilities.conversion import convert_from_genbank class TestComponent(unittest.TestCase): - def test_filter_by_roles(self): - """test the filter by roles utility""" - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - # create and add 3 components, with 2 having common role of dna - comp_1 = sbol3.Component('component_1', sbol3.SBO_DNA, roles=[tyto.SBO.deoxyribonucleic_acid]) - comp_2 = sbol3.Component('component_2', sbol3.SBO_DNA, roles=[tyto.SO.engineered_region]) - comp_3 = sbol3.Component('component_3', sbol3.SBO_DNA, roles=[tyto.SO.engineered_region, tyto.SBO.deoxyribonucleic_acid]) - doc.add(comp_1) - doc.add(comp_2) - doc.add(comp_3) - # only comp_1 and comp_3 must be returned by the function - matched = list(filter_top_level(doc, by_roles(tyto.SBO.deoxyribonucleic_acid))) - assert(comp_1 in matched and comp_3 in matched and len(matched) == 2) - - def test_filter_by_types(self): - """test the filter by types utility""" - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - # create and add 3 components, with 2 one of the types as SBO_DNA - comp_1 = sbol3.Component('component_1', types=[sbol3.SBO_DNA]) - comp_2 = sbol3.Component('component_2', types=[sbol3.SBO_DEGRADATION, sbol3.SBO_DNA]) - comp_3 = sbol3.Component('component_3', types=[sbol3.SBO_FUNCTIONAL_ENTITY]) - doc.add(comp_1) - doc.add(comp_2) - doc.add(comp_3) - # only comp_1 and comp_3 must be returned by the function - matched = list(filter_top_level(doc, by_types(sbol3.SBO_DNA))) - assert(comp_1 in matched and comp_2 in matched and len(matched) == 2) - def test_dna_part(self): """Test the correctness of is_dna_part check""" # create a test dna component - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') + dna_identity = 'Test_dna_identity' dna_sequence = 'Test_dna_sequence' dna_description = 'Test_dna_description' sbol3.set_namespace('http://sbolstandard.org/testfiles') # we don't need dna_sequence object - test_dna_component_1, _ = dna_component_with_sequence('test_identity1', dna_sequence, description=dna_description) - test_dna_component_2, _ = dna_component_with_sequence('test_identity2', dna_sequence, description=dna_description) - test_dna_component_3, _ = dna_component_with_sequence('test_identity3', dna_sequence, description=dna_description) + test_dna_component, _ = dna_component_with_sequence(dna_identity, dna_sequence, description=dna_description) # adding atleast 1 SO role - test_dna_component_1.roles.append(sbol3.SO_GENE) - test_dna_component_2.roles.append(sbol3.SBO_DEGRADATION) - # created and add 3 components, with 1 satisfying all criteria - doc.add(test_dna_component_3) - doc.add(test_dna_component_2) - doc.add(test_dna_component_1) - # use filter_top_level utility to filter objects which are dna parts - matched = list(filter_top_level(doc, is_dna_part)) - # 2nd component had non SO roles, 3rd component had no role - assert test_dna_component_1 in matched - assert test_dna_component_2 not in matched - assert test_dna_component_3 not in matched + test_dna_component.roles.append(sbol3.SO_GENE) + assert is_dna_part(test_dna_component) def test_system_building(self): doc = sbol3.Document() @@ -340,12 +298,12 @@ def test_sep055(self): hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') - + # Restriction enzyme restriction_enzyme_name = 'BsaI' restriction_enzyme_definition = 'http://rebase.neb.com/rebase/enz/BsaI.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.80 bsai = ed_restriction_enzyme(restriction_enzyme_name) assert bsai.definition == restriction_enzyme_definition, 'Constructor Error: ed_restriction_enzyme' - + # Backbone backbone_identity = 'backbone' backbone_sequence = 'aaGGGGttttCCCCaa' dropout_location = [3,15] @@ -401,8 +359,7 @@ def test_sep055(self): linear_backbone_component.constraints.append(backbone_dropout_meets) doc.add([linear_backbone_component, linear_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity}' - - + # Part in backbone hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') @@ -462,109 +419,43 @@ def test_sep055(self): part_in_backbone_component_linear.types.append(sbol3.SO_LINEAR) doc.add([part_in_backbone_component_linear, part_in_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' - - hlc_doc = sbol3.Document() - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - - test_promoter, test_promoter_seq = promoter('pTest', 'aaTTaa') - part_location = [4,14] - fusion_site_length = 4 - test_backbone, test_backbone_seq = backbone('test_bb','cccGGGGTTGGGGccc', dropout_location, fusion_site_length, linear=False) - identity_pib = 'part_in_backbone2' - sequence_str_pib = 'cccGGGGTTGGGGccc' - part_role = sbol3.SO_PROMOTER - - hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence = part_in_backbone2(identity=identity_pib, sequence=sequence_str_pib, - part_location=part_location, part_roles=[part_role], fusion_site_length=fusion_site_length, linear=False) - hlc_doc.add([hl_part_in_backbone_circular, hl_part_in_backbone_circular_sequence]) - - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity=identity_pib, sequence=sequence_str_pib) - part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) - part_in_backbone_component.roles.append(sbol3.SO_PROMOTER) + # Part in backbone from SBOL + target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' + b0015_doc = convert_from_genbank('b0015.gb', 'https://github.com/Gonza10V') + b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] + b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements + b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + for feature in b0015_ef_in_bb.features: + if feature.roles == [sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]: + b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] + assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence - part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) - insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) - insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) - part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=[part_role]) - part_sequence_feature.roles.append(tyto.SO.engineered_insert) - insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) - - #circular - part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) - part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) - open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) - open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) - open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) - - part_in_backbone_component.features.append(part_sequence_feature) - part_in_backbone_component.features.append(insertion_sites_feature) - part_in_backbone_component.features.append(open_backbone_feature) - backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) - part_in_backbone_component.constraints.append(backbone_part_meets) - doc.add([part_in_backbone_component, part_in_backbone_seq]) - assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Circular {identity_pib}' - - hlc_doc = sbol3.Document() - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - - hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence = part_in_backbone2(identity=identity_pib, sequence=sequence_str_pib, - part_location=part_location, part_roles=[part_role], fusion_site_length=fusion_site_length, linear=True) - hlc_doc.add([hl_part_in_backbone_linear, hl_part_in_backbone_linear_sequence]) - - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity=identity_pib, sequence=sequence_str_pib) - part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) - part_in_backbone_component.roles.append(sbol3.SO_PROMOTER) - - part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1], order=2) - insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) - insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) - part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=[part_role]) - part_sequence_feature.roles.append(tyto.SO.engineered_insert) - insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) - - #linear - part_in_backbone_component.types.append(sbol3.SO_LINEAR) - part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) - open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) - open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) - open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) - - part_in_backbone_component.features.append(part_sequence_feature) - part_in_backbone_component.features.append(insertion_sites_feature) - part_in_backbone_component.features.append(open_backbone_feature) - backbone_part_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) - part_in_backbone_component.constraints.append(backbone_part_meets) - doc.add([part_in_backbone_component, part_in_backbone_seq]) - assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' - + # Assembly plan setup hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') bsai = ed_restriction_enzyme('BsaI') - #added by Assembly - #lvl1 acceptor lvl1_pOdd_acceptor_seq = 'gctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgggagtgagacccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggctttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcacacatactagagaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttataggtctcaGCTTgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgaca' podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') doc.add([podd_backbone,podd_backbone_seq]) - #parts in backbone - j23100_b0034_ac_seq_str = 'ttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcgggagtctTTGACGGCTAGCTCAGTCCTAGGTACAGTGCTAGCCTAGAGAAAGAGGAGAAATACTAGaatgCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtct' - sfgfp_ce_seq_str = 'ccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcgaatgcgtaaaggcgaggaactgttcactggtgtcgtccctattctggtggaactggatggtgatgtcaacggtcataagttttccgtgcgtggcgagggtgaaggtgacgcaactaatggtaaactgacgctgaagttcatctgtactactggtaaactgccggtaccttggccgactctggtaacgacgctgacttatggtgttcagtgctttgctcgttatccggaccatatgaagcagcatgacttcttcaagtccgccatgccggaaggctatgtgcaggaacgcacgatttcctttaaggatgacggcacgtacaaaacgcgtgcggaagtgaaatttgaaggcgataccctggtaaaccgcattgagctgaaaggcattgactttaaagaagacggcaatatcctgggccataagctggaatacaattttaacagccacaatgtttacatcaccgccgataaacaaaaaaatggcattaaagcgaattttaaaattcgccacaacgtggaggatggcagcgtgcagctggctgatcactaccagcaaaacactccaatcggtgatggtcctgttctgctgccagacaatcactatctgagcacgcaaagcgttctgtctaaagatccgaacgagaaacgcgatcatatggttctgctggagttcgtaaccgcagcgggcatcacgcatggtatggatgaactgtacaaatgatgagcttCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaacaagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtg' - b0015_ef_seq_str = 'aagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcggcttccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacgctCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaac' - - j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone2('j23100_b0034_ac_in_bb', j23100_b0034_ac_seq_str, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') + j23100_b0034_doc = convert_from_genbank('j23100_b0034.gb', 'https://github.com/Gonza10V') + j23100_b0034_ac = [top_level for top_level in j23100_b0034_doc if type(top_level)==sbol3.Component][0] + j23100_b0034_ac_seq_str = j23100_b0034_ac.sequences[0].lookup().elements + sfgfp_doc = convert_from_genbank('sfgfp.gb', 'https://github.com/Gonza10V') + sfgfp_ce = [top_level for top_level in sfgfp_doc if type(top_level)==sbol3.Component][0] + sfgfp_ce_seq_str = sfgfp_ce.sequences[0].lookup().elements + b0015_doc = convert_from_genbank('b0015.gb', 'https://github.com/Gonza10V') + b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] + b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements + j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone_from_sbol('j23100_b0034_ac_in_bb', j23100_b0034_ac, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') doc.add([j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq]) - - sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq = part_in_backbone2('sfgfp_ce_in_bb', sfgfp_ce_seq_str, [130,854], [sbol3.SO_CDS], 4, False, name='sfgfp_ce_in_bb') + sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq = part_in_backbone_from_sbol('sfgfp_ce_in_bb', sfgfp_ce, [130,854], [sbol3.SO_CDS], 4, False, name='sfgfp_ce_in_bb') doc.add([sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq]) - - b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone2('b0015_ef_in_bb', b0015_ef_seq_str, [514,650], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) - #Assembly plan test_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( name='constitutive_gfp_tu', @@ -572,9 +463,7 @@ def test_sep055(self): acceptor_backbone=podd_backbone, restriction_enzyme=bsai, document=doc) - test_assembly_plan.run() - #Check assembly plan expected_assembled_j23100_b0034_ac_seq_str = j23100_b0034_ac_seq_str[475:545] assembled_j23100_b0034_ac_seq_str = test_assembly_plan.extracted_parts[0].sequences[0].lookup().elements diff --git a/test/test_files/b0015.gb b/test/test_files/b0015.gb new file mode 100644 index 00000000..079d75ae --- /dev/null +++ b/test/test_files/b0015.gb @@ -0,0 +1,87 @@ +LOCUS Copy_of_B0015_EF:_pSB1C 2190 bp ds-DNA circular 28-JAN-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and B0015_EF +FEATURES Location/Qualifiers + misc_feature 385..404 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(461..505) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 507..512 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + terminator 517..646 + /label="BBa-B0015 Terminator" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + terminator 526..597 + /label="rrnB T1 terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + terminator 613..640 + /label="T7Te terminator" + /ApEinfo_revcolor="#75c6a9" + /ApEinfo_fwdcolor="#75c6a9" + misc_feature 664..735 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(799..818) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(903..1517) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1684..1789) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(1802..271) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 aagggtgaac actatcccat atcaccagct caccgtcttt cattgccata cgaaattccg + 61 gatgagcatt catcaggcgg gcaagaatgt gaataaaggc cggataaaac ttgtgcttat + 121 ttttctttac ggtctttaaa aaggccgtaa tatccagctg aacggtctgg ttataggtac + 181 attgagcaac tgactgaaat gcctcaaaat gttctttacg atgccattgg gatatatcaa + 241 cggtggtata tccagtgatt tttttctcca ttttagcttc cttagctcct gaaaatctcg + 301 ataactcaaa aaatacgccc ggtagtgatc ttatttcatt atggtgaaag ttggaacctc + 361 ttacgtgccc gatcaactcg agtgccacct gacgtctaag aaaccattat tatcatgaca + 421 ttaacctata aaaataggcg tatcacgagg cagaatttca gataaaaaaa atccttagct + 481 ttcgctaagg atgatttctg gaattcggtc tcggcttcca ggcatcaaat aaaacgaaag + 541 gctcagtcga aagactgggc ctttcgtttt atctgttgtt tgtcggtgaa cgctctctac + 601 tagagtcaca ctggctcacc ttcgggtggg cctttctgcg tttatacgct CGAGaccctg + 661 cagtccggca aaaaagggca aggtgtcacc accctgccct ttttctttaa aaccgaaaag + 721 attacttcgc gttatgcagg cttcctcgct cactgactcg ctgcgctcgg tcgttcggct + 781 gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag aatcagggga + 841 taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc gtaaaaaggc + 901 cgcgttgctg gcgtttttcc acaggctccg cccccctgac gagcatcaca aaaatcgacg + 961 ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt ttccccctgg + 1021 aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc tgtccgcctt + 1081 tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc tcagttcggt + 1141 gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc ccgaccgctg + 1201 cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact tatcgccact + 1261 ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg ctacagagtt + 1321 cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta tctgcgctct + 1381 gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca aacaaaccac + 1441 cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa aaaaaggatc + 1501 tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg aaaactcacg + 1561 ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc ttttaaatta + 1621 aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg acagctcgag + 1681 gcttggattc tcaccaataa aaaacgcccg gcggcaaccg agcgttctga acaaatccag + 1741 atggagttct gaggtcatta ctggatctat caacaggagt ccaagcgagc tcgatatcaa + 1801 attacgcccc gccctgccac tcatcgcagt actgttgtaa ttcattaagc attctgccga + 1861 catggaagcc atcacaaacg gcatgatgaa cctgaatcgc cagcggcatc agcaccttgt + 1921 cgccttgcgt ataatatttg cccatggtga aaacgggggc gaagaagttg tccatattgg + 1981 ccacgtttaa atcaaaactg gtgaaactca cccagggatt ggctgagacg aaaaacatat + 2041 tctcaataaa ccctttaggg aaataggcca ggttttcacc gtaacacgcc acatcttgcg + 2101 aatatatgtg tagaaactgc cggaaatcgt cgtggtattc actccagagc gatgaaaacg + 2161 tttcagtttg ctcatggaaa acggtgtaac +// \ No newline at end of file diff --git a/test/test_files/j23100_b0034.gb b/test/test_files/j23100_b0034.gb new file mode 100644 index 00000000..e3c10b03 --- /dev/null +++ b/test/test_files/j23100_b0034.gb @@ -0,0 +1,82 @@ +LOCUS Copy_of_J23100_B0034_AC 2123 bp ds-DNA circular 27-JAN-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 347..366 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(423..467) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 469..474 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + Promoter 483..517 + /label="BBa_J23100" + /ApEinfo_revcolor="#b4abac" + /ApEinfo_fwdcolor="#b4abac" + RBS 524..535 + /label="B0034" + /ApEinfo_revcolor="#b4abac" + /ApEinfo_fwdcolor="#b4abac" + misc_feature 559..630 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(694..713) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(798..1412) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1579..1684) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(1697..233) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 ttcattgcca tacgaaattc cggatgagca ttcatcaggc gggcaagaat gtgaataaag + 61 gccggataaa acttgtgctt atttttcttt acggtcttta aaaaggccgt aatatccagc + 121 tgaacggtct ggttataggt acattgagca actgactgaa atgcctcaaa atgttcttta + 181 cgatgccatt gggatatatc aacggtggta tatccagtga tttttttctc cattttagct + 241 tccttagctc ctgaaaatct cgataactca aaaaatacgc ccggtagtga tcttatttca + 301 ttatggtgaa agttggaacc tcttacgtgc ccgatcaact cgagtgccac ctgacgtcta + 361 agaaaccatt attatcatga cattaaccta taaaaatagg cgtatcacga ggcagaattt + 421 cagataaaaa aaatccttag ctttcgctaa ggatgatttc tggaattcgg tctcgggagt + 481 ctTTGACGGC TAGCTCAGTC CTAGGTACAG TGCTAGCCTA GAGAAAGAGG AGAAATACTA + 541 GaatgCGAGa ccctgcagtc cggcaaaaaa gggcaaggtg tcaccaccct gccctttttc + 601 tttaaaaccg aaaagattac ttcgcgttat gcaggcttcc tcgctcactg actcgctgcg + 661 ctcggtcgtt cggctgcggc gagcggtatc agctcactca aaggcggtaa tacggttatc + 721 cacagaatca ggggataacg caggaaagaa catgtgagca aaaggccagc aaaaggccag + 781 gaaccgtaaa aaggccgcgt tgctggcgtt tttccacagg ctccgccccc ctgacgagca + 841 tcacaaaaat cgacgctcaa gtcagaggtg gcgaaacccg acaggactat aaagatacca + 901 ggcgtttccc cctggaagct ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg + 961 atacctgtcc gcctttctcc cttcgggaag cgtggcgctt tctcatagct cacgctgtag + 1021 gtatctcagt tcggtgtagg tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt + 1081 tcagcccgac cgctgcgcct tatccggtaa ctatcgtctt gagtccaacc cggtaagaca + 1141 cgacttatcg ccactggcag cagccactgg taacaggatt agcagagcga ggtatgtagg + 1201 cggtgctaca gagttcttga agtggtggcc taactacggc tacactagaa gaacagtatt + 1261 tggtatctgc gctctgctga agccagttac cttcggaaaa agagttggta gctcttgatc + 1321 cggcaaacaa accaccgctg gtagcggtgg tttttttgtt tgcaagcagc agattacgcg + 1381 cagaaaaaaa ggatctcaag aagatccttt gatcttttct acggggtctg acgctcagtg + 1441 gaacgaaaac tcacgttaag ggattttggt catgagatta tcaaaaagga tcttcaccta + 1501 gatcctttta aattaaaaat gaagttttaa atcaatctaa agtatatatg agtaaacttg + 1561 gtctgacagc tcgaggcttg gattctcacc aataaaaaac gcccggcggc aaccgagcgt + 1621 tctgaacaaa tccagatgga gttctgaggt cattactgga tctatcaaca ggagtccaag + 1681 cgagctcgat atcaaattac gccccgccct gccactcatc gcagtactgt tgtaattcat + 1741 taagcattct gccgacatgg aagccatcac aaacggcatg atgaacctga atcgccagcg + 1801 gcatcagcac cttgtcgcct tgcgtataat atttgcccat ggtgaaaacg ggggcgaaga + 1861 agttgtccat attggccacg tttaaatcaa aactggtgaa actcacccag ggattggctg + 1921 agacgaaaaa catattctca ataaaccctt tagggaaata ggccaggttt tcaccgtaac + 1981 acgccacatc ttgcgaatat atgtgtagaa actgccggaa atcgtcgtgg tattcactcc + 2041 agagcgatga aaacgtttca gtttgctcat ggaaaacggt gtaacaaggg tgaacactat + 2101 cccatatcac cagctcaccg tct +// \ No newline at end of file diff --git a/test/test_files/sfgfp.gb b/test/test_files/sfgfp.gb new file mode 100644 index 00000000..b667e882 --- /dev/null +++ b/test/test_files/sfgfp.gb @@ -0,0 +1,102 @@ +LOCUS Copy_of_sfGFP_CE:_pSB1C 2778 bp ds-DNA circular 15-NOV-2022 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and sfGFP_CE Sequencing + data suggests primer needs to be re-designed - appears to have part + of B0015 at the end +FEATURES Location/Qualifiers + misc_feature 1..20 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(77..121) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 123..128 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + CDS 131..850 + /label="sfGFP (BBa_I746916)" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + CDS 131..850 + /label="Translation 131-850" + /translation="MRKGEELFTGVVPILVELDGDVNGHKFSVRGEGEGDATNGKLTLKFICTTGKLPVPWPTLVTTLTYGVQCFARYPDHMKQHDFFKSAMPEGYVQERTISFKDDGTYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNFNSHNVYITADKQKNGIKANFKIRHNVEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSVLSKDPNEKRDHMVLLEFVTAAGITHGMDELYK**" + misc_feature 851..854 + /label="Fusion Site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 856..861 + /label="BsaI Site" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 868..939 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1003..1022) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1107..1721) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1888..1993) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(2006..2665) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 ccacctgacg tctaagaaac cattattatc atgacattaa cctataaaaa taggcgtatc + 61 acgaggcaga atttcagata aaaaaaatcc ttagctttcg ctaaggatga tttctggaat + 121 tcggtctcga atgcgtaaag gcgaggaact gttcactggt gtcgtcccta ttctggtgga + 181 actggatggt gatgtcaacg gtcataagtt ttccgtgcgt ggcgagggtg aaggtgacgc + 241 aactaatggt aaactgacgc tgaagttcat ctgtactact ggtaaactgc cggtaccttg + 301 gccgactctg gtaacgacgc tgacttatgg tgttcagtgc tttgctcgtt atccggacca + 361 tatgaagcag catgacttct tcaagtccgc catgccggaa ggctatgtgc aggaacgcac + 421 gatttccttt aaggatgacg gcacgtacaa aacgcgtgcg gaagtgaaat ttgaaggcga + 481 taccctggta aaccgcattg agctgaaagg cattgacttt aaagaagacg gcaatatcct + 541 gggccataag ctggaataca attttaacag ccacaatgtt tacatcaccg ccgataaaca + 601 aaaaaatggc attaaagcga attttaaaat tcgccacaac gtggaggatg gcagcgtgca + 661 gctggctgat cactaccagc aaaacactcc aatcggtgat ggtcctgttc tgctgccaga + 721 caatcactat ctgagcacgc aaagcgttct gtctaaagat ccgaacgaga aacgcgatca + 781 tatggttctg ctggagttcg taaccgcagc gggcatcacg catggtatgg atgaactgta + 841 caaatgatga gcttCGAGac cctgcagtcc ggcaaaaaag ggcaaggtgt caccaccctg + 901 ccctttttct ttaaaaccga aaagattact tcgcgttatg caggcttcct cgctcactga + 961 ctcgctgcgc tcggtcgttc ggctgcggcg agcggtatca gctcactcaa aggcggtaat + 1021 acggttatcc acagaatcag gggataacgc aggaaagaac atgtgagcaa aaggccagca + 1081 aaaggccagg aaccgtaaaa aggccgcgtt gctggcgttt ttccacaggc tccgcccccc + 1141 tgacgagcat cacaaaaatc gacgctcaag tcagaggtgg cgaaacccga caggactata + 1201 aagataccag gcgtttcccc ctggaagctc cctcgtgcgc tctcctgttc cgaccctgcc + 1261 gcttaccgga tacctgtccg cctttctccc ttcgggaagc gtggcgcttt ctcatagctc + 1321 acgctgtagg tatctcagtt cggtgtaggt cgttcgctcc aagctgggct gtgtgcacga + 1381 accccccgtt cagcccgacc gctgcgcctt atccggtaac tatcgtcttg agtccaaccc + 1441 ggtaagacac gacttatcgc cactggcagc agccactggt aacaggatta gcagagcgag + 1501 gtatgtaggc ggtgctacag agttcttgaa gtggtggcct aactacggct acactagaag + 1561 aacagtattt ggtatctgcg ctctgctgaa gccagttacc ttcggaaaaa gagttggtag + 1621 ctcttgatcc ggcaaacaaa ccaccgctgg tagcggtggt ttttttgttt gcaagcagca + 1681 gattacgcgc agaaaaaaag gatctcaaga agatcctttg atcttttcta cggggtctga + 1741 cgctcagtgg aacgaaaact cacgttaagg gattttggtc atgagattat caaaaaggat + 1801 cttcacctag atccttttaa attaaaaatg aagttttaaa tcaatctaaa gtatatatga + 1861 gtaaacttgg tctgacagct cgaggcttgg attctcacca ataaaaaacg cccggcggca + 1921 accgagcgtt ctgaacaaat ccagatggag ttctgaggtc attactggat ctatcaacag + 1981 gagtccaagc gagctcgata tcaaattacg ccccgccctg ccactcatcg cagtactgtt + 2041 gtaattcatt aagcattctg ccgacatgga agccatcaca aacggcatga tgaacctgaa + 2101 tcgccagcgg catcagcacc ttgtcgcctt gcgtataata tttgcccatg gtgaaaacgg + 2161 gggcgaagaa gttgtccata ttggccacgt ttaaatcaaa actggtgaaa ctcacccagg + 2221 gattggctga gacgaaaaac atattctcaa taaacccttt agggaaatag gccaggtttt + 2281 caccgtaaca cgccacatct tgcgaatata tgtgtagaaa ctgccggaaa tcgtcgtggt + 2341 attcactcca gagcgatgaa aacgtttcag tttgctcatg gaaaacggtg taacaagggt + 2401 gaacactatc ccatatcacc agctcaccgt ctttcattgc catacgaaat tccggatgag + 2461 cattcatcag gcgggcaaga atgtgaataa aggccggata aaacttgtgc ttatttttct + 2521 ttacggtctt taaaaaggcc gtaatatcca gctgaacggt ctggttatag gtacattgag + 2581 caactgactg aaatgcctca aaatgttctt tacgatgcca ttgggatata tcaacggtgg + 2641 tatatccagt gatttttttc tccattttag cttccttagc tcctgaaaat ctcgataact + 2701 caaaaaatac gcccggtagt gatcttattt cattatggtg aaagttggaa cctcttacgt + 2761 gcccgatcaa ctcgagtg +// \ No newline at end of file From 9f07fff32fb96f37a3642d2f23fd56080942137e Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 22 Mar 2023 13:20:57 +0000 Subject: [PATCH 16/42] test in separated routines --- test/test_component.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/test_component.py b/test/test_component.py index 7c5beced..4ec80c57 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -293,7 +293,7 @@ def test_high_level_constructors(self): doc.add(media_comp) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: {media_identity}' - def test_sep055(self): + def test_restriction_enzyme_bp011(self): """Test construction of components and features using helper functions: for each, build manually and compare.""" hlc_doc = sbol3.Document() doc = sbol3.Document() @@ -303,6 +303,11 @@ def test_sep055(self): restriction_enzyme_definition = 'http://rebase.neb.com/rebase/enz/BsaI.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.80 bsai = ed_restriction_enzyme(restriction_enzyme_name) assert bsai.definition == restriction_enzyme_definition, 'Constructor Error: ed_restriction_enzyme' + + def test_backbone_bp011(self): + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') # Backbone backbone_identity = 'backbone' backbone_sequence = 'aaGGGGttttCCCCaa' @@ -359,6 +364,11 @@ def test_sep055(self): linear_backbone_component.constraints.append(backbone_dropout_meets) doc.add([linear_backbone_component, linear_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity}' + + def test_part_in_backbone_bp011(self): + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') # Part in backbone hlc_doc = sbol3.Document() doc = sbol3.Document() @@ -419,6 +429,11 @@ def test_sep055(self): part_in_backbone_component_linear.types.append(sbol3.SO_LINEAR) doc.add([part_in_backbone_component_linear, part_in_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' + + def test_part_in_backbone_from_sbol_bp011(self): + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') # Part in backbone from SBOL target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' b0015_doc = convert_from_genbank('b0015.gb', 'https://github.com/Gonza10V') @@ -430,11 +445,11 @@ def test_sep055(self): b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence - # Assembly plan setup + def test_assembly_plan_bp011(self): hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') - + # Assembly plan setup bsai = ed_restriction_enzyme('BsaI') #lvl1 acceptor lvl1_pOdd_acceptor_seq = 'gctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgggagtgagacccaatacgcaaaccgcctctccccgcgcgttggccgattcattaatgcagctggcacgacaggtttcccgactggaaagcgggcagtgagcgcaacgcaattaatgtgagttagctcactcattaggcaccccaggctttacactttatgcttccggctcgtatgttgtgtggaattgtgagcggataacaatttcacacatactagagaaagaggagaaatactagatggcttcctccgaagacgttatcaaagagttcatgcgtttcaaagttcgtatggaaggttccgttaacggtcacgagttcgaaatcgaaggtgaaggtgaaggtcgtccgtacgaaggtacccagaccgctaaactgaaagttaccaaaggtggtccgctgccgttcgcttgggacatcctgtccccgcagttccagtacggttccaaagcttacgttaaacacccggctgacatcccggactacctgaaactgtccttcccggaaggtttcaaatgggaacgtgttatgaacttcgaagacggtggtgttgttaccgttacccaggactcctccctgcaagacggtgagttcatctacaaagttaaactgcgtggtaccaacttcccgtccgacggtccggttatgcagaaaaaaaccatgggttgggaagcttccaccgaacgtatgtacccggaagacggtgctctgaaaggtgaaatcaaaatgcgtctgaaactgaaagacggtggtcactacgacgctgaagttaaaaccacctacatggctaaaaaaccggttcagctgccgggtgcttacaaaaccgacatcaaactggacatcacctcccacaacgaagactacaccatcgttgaacagtacgaacgtgctgaaggtcgtcactccaccggtgcttaataacgctgatagtgctagtgtagatcgctactagagccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttataggtctcaGCTTgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgaca' From a212530e1c4c69c418d0e5a1d0bda7f0f55efe93 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 22 Mar 2023 13:35:13 +0000 Subject: [PATCH 17/42] provide a correct file direction for convert_from_genbank --- test/test_component.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_component.py b/test/test_component.py index 4ec80c57..6846b926 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -456,13 +456,13 @@ def test_assembly_plan_bp011(self): podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') doc.add([podd_backbone,podd_backbone_seq]) #parts in backbone - j23100_b0034_doc = convert_from_genbank('j23100_b0034.gb', 'https://github.com/Gonza10V') + j23100_b0034_doc = convert_from_genbank('test_files/j23100_b0034.gb', 'https://github.com/Gonza10V') j23100_b0034_ac = [top_level for top_level in j23100_b0034_doc if type(top_level)==sbol3.Component][0] j23100_b0034_ac_seq_str = j23100_b0034_ac.sequences[0].lookup().elements - sfgfp_doc = convert_from_genbank('sfgfp.gb', 'https://github.com/Gonza10V') + sfgfp_doc = convert_from_genbank('test_files/sfgfp.gb', 'https://github.com/Gonza10V') sfgfp_ce = [top_level for top_level in sfgfp_doc if type(top_level)==sbol3.Component][0] sfgfp_ce_seq_str = sfgfp_ce.sequences[0].lookup().elements - b0015_doc = convert_from_genbank('b0015.gb', 'https://github.com/Gonza10V') + b0015_doc = convert_from_genbank('test_files/b0015.gb', 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone_from_sbol('j23100_b0034_ac_in_bb', j23100_b0034_ac, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') From 599be7e5dda31a9afd974e78d61992de280dbe56 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 27 Mar 2023 11:09:45 +0100 Subject: [PATCH 18/42] using paterns in sbol-utilities for directories --- test/test_component.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_component.py b/test/test_component.py index 6846b926..531ab269 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -449,6 +449,7 @@ def test_assembly_plan_bp011(self): hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') + test_dir = os.path.dirname(os.path.realpath(__file__)) # Assembly plan setup bsai = ed_restriction_enzyme('BsaI') #lvl1 acceptor @@ -456,13 +457,16 @@ def test_assembly_plan_bp011(self): podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') doc.add([podd_backbone,podd_backbone_seq]) #parts in backbone - j23100_b0034_doc = convert_from_genbank('test_files/j23100_b0034.gb', 'https://github.com/Gonza10V') + j23100_b0034_dir = os.path.join(test_dir, 'test_files', 'j23100_b0034.gb') + sfgfp_dir = os.path.join(test_dir, 'test_files', 'sfgfp.gb') + b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') + j23100_b0034_doc = convert_from_genbank(j23100_b0034_dir, 'https://github.com/Gonza10V') j23100_b0034_ac = [top_level for top_level in j23100_b0034_doc if type(top_level)==sbol3.Component][0] j23100_b0034_ac_seq_str = j23100_b0034_ac.sequences[0].lookup().elements - sfgfp_doc = convert_from_genbank('test_files/sfgfp.gb', 'https://github.com/Gonza10V') + sfgfp_doc = convert_from_genbank(sfgfp_dir, 'https://github.com/Gonza10V') sfgfp_ce = [top_level for top_level in sfgfp_doc if type(top_level)==sbol3.Component][0] sfgfp_ce_seq_str = sfgfp_ce.sequences[0].lookup().elements - b0015_doc = convert_from_genbank('test_files/b0015.gb', 'https://github.com/Gonza10V') + b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone_from_sbol('j23100_b0034_ac_in_bb', j23100_b0034_ac, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') From 260b4d1954b58d234ef6171073a625e1a9eaabac Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 27 Mar 2023 13:59:41 +0100 Subject: [PATCH 19/42] add dir from codebase on test_part_in_backbone_from_sbol_bp011 --- test/test_component.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_component.py b/test/test_component.py index 531ab269..3551a046 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -434,9 +434,12 @@ def test_part_in_backbone_from_sbol_bp011(self): hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') + test_dir = os.path.dirname(os.path.realpath(__file__)) + b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') + # Part in backbone from SBOL target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' - b0015_doc = convert_from_genbank('b0015.gb', 'https://github.com/Gonza10V') + b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') From 8cb7404fbdaf9b54a515fb9559a24264bef786c9 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Mon, 27 Mar 2023 16:16:24 +0100 Subject: [PATCH 20/42] adding objects to the document --- test/test_component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_component.py b/test/test_component.py index 3551a046..f74bff6b 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -441,8 +441,8 @@ def test_part_in_backbone_from_sbol_bp011(self): target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] - b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) for feature in b0015_ef_in_bb.features: if feature.roles == [sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]: b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] From 02e71f44f857d9c9d78e25f82640be96a3c54b62 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Mon, 27 Mar 2023 17:18:12 +0100 Subject: [PATCH 21/42] Update sbol_utilities/component.py fixes typo a in length Co-authored-by: Jacob Beal --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 947eb94d..edf97de6 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -631,7 +631,7 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone. :param part_location: List of 2 integers that indicates the start and the end of the unitary part sequence. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param part_roles: List of strings that indicates the roles of the part. - :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. :param overwrite: Boolean that indicates if the input Component will be overwritten. By default it is seted to True. :param kwargs: Keyword arguments of any other Component attribute. From 3d68860dcbda12a8f92019180f5aa29c1bbcf21a Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Mon, 27 Mar 2023 17:18:58 +0100 Subject: [PATCH 22/42] Update sbol_utilities/component.py improves doctring that explain overwrite param Co-authored-by: Jacob Beal --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index edf97de6..c9763e80 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -633,7 +633,7 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part :param part_roles: List of strings that indicates the roles of the part. :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. - :param overwrite: Boolean that indicates if the input Component will be overwritten. By default it is seted to True. + :param overwrite: if true, modify the Component provided; otherwise, copy it to make a new component. :param kwargs: Keyword arguments of any other Component attribute. :return: A tuple of Component and Sequence. """ From a44b8b2c7f41513830f9479c53def71984e96c56 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Thu, 30 Mar 2023 13:20:13 +0100 Subject: [PATCH 23/42] update --- sbol_utilities/component.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 947eb94d..61ae8ac1 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -902,7 +902,6 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L class Assembly_plan_composite_in_backbone_single_enzyme(): """Creates a Assembly Plan. - #classes uses param here? :param parts_in_backbone: Parts in backbone to be assembled. :param acceptor_backbone: Backbone in which parts are inserted on the assembly. :param restriction_enzymes: Restriction enzyme with correct name from Bio.Restriction as Externally Defined. From 2caea26266818e347610b531611d8c0458941860 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Thu, 30 Mar 2023 14:48:36 +0100 Subject: [PATCH 24/42] changes in response to code review 27/03/23 --- sbol_utilities/component.py | 50 ++++++++++++------------------------- test/test_component.py | 17 +++++-------- 2 files changed, 22 insertions(+), 45 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index b209cd5d..f92541a2 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -624,16 +624,19 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.types.append(topology_type) return part_in_backbone_component, part_in_backbone_seq -def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, overwrite:bool=True, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: +def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: """Creates a Part in Backbone Component and its Sequence following BP011 from an unformatted SBOL3 Component. + It overwrites the SBOL3 Component provided. + A part inserted into a backbone is represented by a Component that includes both the part insert + as a feature that is a SubComponent and the backbone as another SubComponent. + For more information about BP011 visit https://github.com/SynBioDex/SBOL-examples/tree/main/SBOL/best-practices/BP011 :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. - :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone. - :param part_location: List of 2 integers that indicates the start and the end of the unitary part sequence. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence. + :param part_location: List of 2 integers that indicates the start and the end of the unitary part. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param part_roles: List of strings that indicates the roles of the part. :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) - :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. - :param overwrite: if true, modify the Component provided; otherwise, copy it to make a new component. + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. :param kwargs: Keyword arguments of any other Component attribute. :return: A tuple of Component and Sequence. """ @@ -642,15 +645,13 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part if len(sbol3_comp.sequences)!=1: raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol3_comp.sequences)} sequences') sequence = sbol3_comp.sequences[0].lookup().elements - if overwrite: - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) - else: - part_in_backbone_component = sbol3_comp - part_in_backbone_seq = sbol3_comp.sequences[0].lookup() + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) for part_role in part_roles: part_in_backbone_component.roles.append(part_role) + # creating part feature part_location_comp = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[1]) + #TODO: add the option of fusion sites to be of different lenghts insertion_site_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[0], end=part_location[0]+fusion_site_length, order=1) insertion_site_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=part_location[1], order=3) part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) @@ -659,12 +660,14 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part if linear: part_in_backbone_component.types.append(sbol3.SO_LINEAR) part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + # creating backbone feature open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=1) open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=3) open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) else: part_in_backbone_component.types.append(sbol3.SO_CIRCULAR) part_in_backbone_component.roles.append(tyto.SO.plasmid_vector) + # creating backbone feature open_backbone_location1 = sbol3.Range(sequence=part_in_backbone_seq, start=1, end=part_location[0]+fusion_site_length-1, order=2) open_backbone_location2 = sbol3.Range(sequence=part_in_backbone_seq, start=part_location[1]-fusion_site_length, end=len(sequence), order=1) open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) @@ -673,6 +676,7 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part part_in_backbone_component.features.append(open_backbone_feature) backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) part_in_backbone_component.constraints.append(backbone_dropout_meets) + #TODO: Add a branch to create a component without overwriting the WHOLE input component return part_in_backbone_component, part_in_backbone_seq def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component, **kwargs)-> Tuple[sbol3.Component, sbol3.Sequence]: @@ -692,7 +696,6 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall for re in restriction_enzymes: enzyme = Restriction.__dict__[re.name] restriction_enzymes_pydna.append(enzyme) - #assembly_plan.features.append(re) modifier_participation = sbol3.Participation(roles=[sbol3.SBO_MODIFIER], participant=re) participations.append(modifier_participation) @@ -807,9 +810,7 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L if working_alignment_5_prime_fusion_site in used_fusion_sites: raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") else: used_fusion_sites.add(working_alignment_5_prime_fusion_site) - # if repeated elements pass - #if(all(x in working_alignment for x in alignment)): - # raise ValueError(f"Repeated elements in alignment {alignment}") + working_alignment = alignment + working_alignment @@ -832,9 +833,6 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L # if in used_fusion_sites, raise error if working_alignment_3_prime_fusion_site in used_fusion_sites: raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") - # if repeated elements, raise error - #if(all(x in working_alignment for x in alignment)): - # raise ValueError(f"Repeated elements in alignment {alignment}") working_alignment = working_alignment + alignment @@ -856,7 +854,6 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L break # TODO: feed working alignment to alignments - #alignments.insert(0, working_alignment) # use final products to build assembly product somponent fusion_site_length = 4 @@ -874,29 +871,15 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L # create participations part_extract_subcomponent = sbol3.SubComponent(part_extract) # LocalSubComponent?? part_extract_subcomponents.append(part_extract_subcomponent) - # if not in assembl plan? - #assembly_plan.features.append(part_extract_subcomponent) # should be saved at composite level - #part_subcomponents.append(part_subcomponent) - #part_participation = sbol3.Participation(roles=[sbol3.SBO_REACTANT], participant=part_subcomponent) - #participations.append(part_participation) composite_name = composite_name + part_extract.name # create dna componente and sequence composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}_{composite_name}', composite_sequence_str) # **kwarads use in future? composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) composite_component.features = part_extract_subcomponents # TODO fix order of features - #composite_component.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, composite_component.features[composite_number-1], composite_component.features[composite_number])) - # add product participation - #composite_subcomponent = sbol3.SubComponent(composite_component) - #participations.append(sbol3.Participation(roles=[sbol3.SBO_PRODUCT], participant=composite_subcomponent)) - # create interactions - #assembly_plan.interactions.append(sbol3.Interaction(types=[tyto.SBO.conversion], participations=participations)) products_list.append([composite_component, composite_seq]) composite_number += 1 - #create preceed constrain - #create composite part or part in backbone - #add interactions to assembly_plan - #add participations to assembly_plan + # TODO: modify to work outside of an assembly plan as well return products_list @@ -926,7 +909,6 @@ def __init__(self, name: str, parts_in_backbone: List[sbol3.Component], acceptor def run(self): self.assembly_plan_component.features.append(self.restriction_enzyme) - #store reactant c #extract parts part_number = 1 for part_in_backbone in self.parts_in_backbone: diff --git a/test/test_component.py b/test/test_component.py index f74bff6b..a52784a4 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -294,21 +294,19 @@ def test_high_level_constructors(self): assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: {media_identity}' def test_restriction_enzyme_bp011(self): - """Test construction of components and features using helper functions: for each, build manually and compare.""" - hlc_doc = sbol3.Document() - doc = sbol3.Document() + """Test the restriction_enzyme function""" sbol3.set_namespace('http://sbolstandard.org/testfiles') - # Restriction enzyme restriction_enzyme_name = 'BsaI' restriction_enzyme_definition = 'http://rebase.neb.com/rebase/enz/BsaI.html' # TODO: replace with getting the URI from Enzyme when REBASE identifiers become available in biopython 1.80 bsai = ed_restriction_enzyme(restriction_enzyme_name) assert bsai.definition == restriction_enzyme_definition, 'Constructor Error: ed_restriction_enzyme' def test_backbone_bp011(self): + """Test the backbone function""" hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') - # Backbone + backbone_identity = 'backbone' backbone_sequence = 'aaGGGGttttCCCCaa' dropout_location = [3,15] @@ -366,10 +364,7 @@ def test_backbone_bp011(self): assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity}' def test_part_in_backbone_bp011(self): - hlc_doc = sbol3.Document() - doc = sbol3.Document() - sbol3.set_namespace('http://sbolstandard.org/testfiles') - # Part in backbone + """Test the part_in_backbone function""" hlc_doc = sbol3.Document() doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') @@ -431,7 +426,7 @@ def test_part_in_backbone_bp011(self): assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' def test_part_in_backbone_from_sbol_bp011(self): - hlc_doc = sbol3.Document() + """Test part_in_backbone function with a part from SBOL""" doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') test_dir = os.path.dirname(os.path.realpath(__file__)) @@ -449,7 +444,7 @@ def test_part_in_backbone_from_sbol_bp011(self): assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence def test_assembly_plan_bp011(self): - hlc_doc = sbol3.Document() + """Test assembly plan class""" doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') test_dir = os.path.dirname(os.path.realpath(__file__)) From ccf6aba863c1f839034dc7f94b7ec5451310832d Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Fri, 21 Apr 2023 15:26:48 +0100 Subject: [PATCH 25/42] if identity is not none overwrite the SBOL component --- sbol_utilities/component.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index f92541a2..ca6de42c 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -624,7 +624,7 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.types.append(topology_type) return part_in_backbone_component, part_in_backbone_seq -def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: +def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: """Creates a Part in Backbone Component and its Sequence following BP011 from an unformatted SBOL3 Component. It overwrites the SBOL3 Component provided. A part inserted into a backbone is represented by a Component that includes both the part insert @@ -645,7 +645,11 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part if len(sbol3_comp.sequences)!=1: raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol3_comp.sequences)} sequences') sequence = sbol3_comp.sequences[0].lookup().elements - part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + if identity == None: + part_in_backbone_component = sbol3_comp + part_in_backbone_seq = sbol3_comp.sequences[0] + else: + part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) for part_role in part_roles: part_in_backbone_component.roles.append(part_role) @@ -677,6 +681,9 @@ def part_in_backbone_from_sbol(identity: str, sbol3_comp: sbol3.Component, part backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=part_sequence_feature, object=open_backbone_feature) part_in_backbone_component.constraints.append(backbone_dropout_meets) #TODO: Add a branch to create a component without overwriting the WHOLE input component + #removing repeated types and roles + part_in_backbone_component.types = set(part_in_backbone_component.types) + part_in_backbone_component.roles = set(part_in_backbone_component.roles) return part_in_backbone_component, part_in_backbone_seq def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.ExternallyDefined], assembly_plan:sbol3.Component, **kwargs)-> Tuple[sbol3.Component, sbol3.Sequence]: From 63d34144593d37c811e7c52b6dcbfdafd906f15c Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 26 Apr 2023 17:57:52 +0100 Subject: [PATCH 26/42] improved test_part_in_backbone_from_sbol_bp011 --- sbol_utilities/component.py | 4 +- test/test_component.py | 20 ++++-- .../test_part_in_backbone_from_sbol_bp011.nt | 67 +++++++++++++++++++ 3 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 test/test_files/test_part_in_backbone_from_sbol_bp011.nt diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index ca6de42c..e77ac5d7 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -631,10 +631,10 @@ def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Co as a feature that is a SubComponent and the backbone as another SubComponent. For more information about BP011 visit https://github.com/SynBioDex/SBOL-examples/tree/main/SBOL/best-practices/BP011 - :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. + :param identity: The identity of the Component, is its a String it build a new SBOL Component, if None it adds on top of the input. The identity of Sequence is also identity with the suffix '_seq'. :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence. :param part_location: List of 2 integers that indicates the start and the end of the unitary part. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. - :param part_roles: List of strings that indicates the roles of the part. + :param part_roles: List of strings that indicates the roles to add on the part. :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. :param kwargs: Keyword arguments of any other Component attribute. diff --git a/test/test_component.py b/test/test_component.py index a52784a4..b71ad8ae 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -426,22 +426,28 @@ def test_part_in_backbone_bp011(self): assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {identity_pib}' def test_part_in_backbone_from_sbol_bp011(self): - """Test part_in_backbone function with a part from SBOL""" - doc = sbol3.Document() + hlc_doc = sbol3.Document() sbol3.set_namespace('http://sbolstandard.org/testfiles') test_dir = os.path.dirname(os.path.realpath(__file__)) b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') - # Part in backbone from SBOL target_b0015_unitary_part_sequence = 'ccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] - b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') - doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) - for feature in b0015_ef_in_bb.features: + hlc_b0015_ef_in_bb, hlc_b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') + hlc_doc.add([hlc_b0015_ef_in_bb, hlc_b0015_ef_in_bb_seq]) + for feature in hlc_b0015_ef_in_bb.features: if feature.roles == [sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]: b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] - assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence + assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence , "Unitary part sequence does not match target sequence" + assert len(hlc_b0015_ef_in_bb.features) == 3, f"Incorrect number of features, number of features expeted is 3, got {len(hlc_b0015_ef_in_bb.features)}" + assert hlc_b0015_ef_in_bb.types == [sbol3.SBO_DNA, sbol3.SO_CIRCULAR] , f"Incorrect types, types expected are [sbol3.SBO_DNA, sbol3.SO_CIRCULAR], got {hlc_b0015_ef_in_bb.types}" + assert hlc_b0015_ef_in_bb.roles == [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], f"Incorrect roles, roles expected are [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], got {hlc_b0015_ef_in_bb.roles}" + features_roles = set() + for ft in hlc_b0015_ef_in_bb.features: + for role in ft.roles: + features_roles.add(role) + assert features_roles == set([tyto.SO.insertion_site, sbol3.SO_TERMINATOR, tyto.SO.engineered_insert]), f"Incorrect feature roles, roles expected are [tyto.SO.insertion_site, sbol3.SO_TERMINATOR, tyto.SO.engineered_insert], got {features_roles}" def test_assembly_plan_bp011(self): """Test assembly plan class""" diff --git a/test/test_files/test_part_in_backbone_from_sbol_bp011.nt b/test/test_files/test_part_in_backbone_from_sbol_bp011.nt new file mode 100644 index 00000000..0e244d22 --- /dev/null +++ b/test/test_files/test_part_in_backbone_from_sbol_bp011.nt @@ -0,0 +1,67 @@ + "Constraint1" . + . + . + . + . + "Range1" . + "646"^^ . + . + "518"^^ . + . + "SequenceFeature1" . + . + . + . + . + "Range1" . + "522"^^ . + . + "1"^^ . + "518"^^ . + . + "Range2" . + "646"^^ . + . + "3"^^ . + "642"^^ . + . + "SequenceFeature2" . + . + . + . + . + "Range1" . + "521"^^ . + . + "2"^^ . + "1"^^ . + . + "Range2" . + "2190"^^ . + . + "1"^^ . + "642"^^ . + . + "SequenceFeature3" . + . + . + . + "b0015_ef_in_bb" . + . + . + . + . + . + . + "b0015_ef_in_bb" . + . + . + . + . + . + . + "b0015_ef_in_bb_seq" . + "aagggtgaacactatcccatatcaccagctcaccgtctttcattgccatacgaaattccggatgagcattcatcaggcgggcaagaatgtgaataaaggccggataaaacttgtgcttatttttctttacggtctttaaaaaggccgtaatatccagctgaacggtctggttataggtacattgagcaactgactgaaatgcctcaaaatgttctttacgatgccattgggatatatcaacggtggtatatccagtgatttttttctccattttagcttccttagctcctgaaaatctcgataactcaaaaaatacgcccggtagtgatcttatttcattatggtgaaagttggaacctcttacgtgcccgatcaactcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcggtctcggcttccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttatacgctCGAGaccctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgaggcttggattctcaccaataaaaaacgcccggcggcaaccgagcgttctgaacaaatccagatggagttctgaggtcattactggatctatcaacaggagtccaagcgagctcgatatcaaattacgccccgccctgccactcatcgcagtactgttgtaattcattaagcattctgccgacatggaagccatcacaaacggcatgatgaacctgaatcgccagcggcatcagcaccttgtcgccttgcgtataatatttgcccatggtgaaaacgggggcgaagaagttgtccatattggccacgtttaaatcaaaactggtgaaactcacccagggattggctgagacgaaaaacatattctcaataaaccctttagggaaataggccaggttttcaccgtaacacgccacatcttgcgaatatatgtgtagaaactgccggaaatcgtcgtggtattcactccagagcgatgaaaacgtttcagtttgctcatggaaaacggtgtaac" . + . + . + . From a7c359288b9e7a181c2e5a6a58123e8a9887a910 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Wed, 26 Apr 2023 18:20:00 +0100 Subject: [PATCH 27/42] asserts with sets --- test/test_component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_component.py b/test/test_component.py index b71ad8ae..ada2287d 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -441,8 +441,8 @@ def test_part_in_backbone_from_sbol_bp011(self): b0015_unitary_part_sequence = feature.locations[0].sequence.lookup().elements[feature.locations[0].start-1:feature.locations[0].end] assert target_b0015_unitary_part_sequence == b0015_unitary_part_sequence , "Unitary part sequence does not match target sequence" assert len(hlc_b0015_ef_in_bb.features) == 3, f"Incorrect number of features, number of features expeted is 3, got {len(hlc_b0015_ef_in_bb.features)}" - assert hlc_b0015_ef_in_bb.types == [sbol3.SBO_DNA, sbol3.SO_CIRCULAR] , f"Incorrect types, types expected are [sbol3.SBO_DNA, sbol3.SO_CIRCULAR], got {hlc_b0015_ef_in_bb.types}" - assert hlc_b0015_ef_in_bb.roles == [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], f"Incorrect roles, roles expected are [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], got {hlc_b0015_ef_in_bb.roles}" + assert set(hlc_b0015_ef_in_bb.types) == set([sbol3.SBO_DNA, sbol3.SO_CIRCULAR]) , f"Incorrect types, types expected are [sbol3.SBO_DNA, sbol3.SO_CIRCULAR], got {hlc_b0015_ef_in_bb.types}" + assert set(hlc_b0015_ef_in_bb.roles) == set([sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector]), f"Incorrect roles, roles expected are [sbol3.SO_DOUBLE_STRANDED, sbol3.SO_TERMINATOR, tyto.SO.plasmid_vector], got {hlc_b0015_ef_in_bb.roles}" features_roles = set() for ft in hlc_b0015_ef_in_bb.features: for role in ft.roles: From 0717c4611ebca4caccb462bfe745de20c0cd4546 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Fri, 28 Apr 2023 11:07:59 +0100 Subject: [PATCH 28/42] Update sbol_utilities/component.py Co-authored-by: Jacob Beal --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index e77ac5d7..5d5bc285 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -625,7 +625,7 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo return part_in_backbone_component, part_in_backbone_seq def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: - """Creates a Part in Backbone Component and its Sequence following BP011 from an unformatted SBOL3 Component. + """Restructures a non-hierarchical plasmid Component to follow the part-in-backbone pattern following BP011. It overwrites the SBOL3 Component provided. A part inserted into a backbone is represented by a Component that includes both the part insert as a feature that is a SubComponent and the backbone as another SubComponent. From d33c3e5375540a2c26707ffd15655d35bc4d32d0 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Fri, 28 Apr 2023 12:39:24 +0100 Subject: [PATCH 29/42] adds TODOs --- sbol_utilities/component.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 5d5bc285..6e2db7ac 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -624,7 +624,7 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.types.append(topology_type) return part_in_backbone_component, part_in_backbone_seq -def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: +def part_in_backbone_from_sbol(identity: Union[str, None], sbol_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: """Restructures a non-hierarchical plasmid Component to follow the part-in-backbone pattern following BP011. It overwrites the SBOL3 Component provided. A part inserted into a backbone is represented by a Component that includes both the part insert @@ -632,7 +632,7 @@ def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Co For more information about BP011 visit https://github.com/SynBioDex/SBOL-examples/tree/main/SBOL/best-practices/BP011 :param identity: The identity of the Component, is its a String it build a new SBOL Component, if None it adds on top of the input. The identity of Sequence is also identity with the suffix '_seq'. - :param sbol3_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence. + :param sbol_comp: The SBOL3 Component that will be used to create the part in backbone Component and Sequence. :param part_location: List of 2 integers that indicates the start and the end of the unitary part. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param part_roles: List of strings that indicates the roles to add on the part. :param fusion_site_length: Integer of the length of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) @@ -642,12 +642,12 @@ def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Co """ if len(part_location) != 2: raise ValueError('The part_location only accepts 2 int values in a list.') - if len(sbol3_comp.sequences)!=1: + if len(sbol_comp.sequences)!=1: raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol3_comp.sequences)} sequences') - sequence = sbol3_comp.sequences[0].lookup().elements + sequence = sbol_comp.sequences[0].lookup().elements if identity == None: - part_in_backbone_component = sbol3_comp - part_in_backbone_seq = sbol3_comp.sequences[0] + part_in_backbone_component = sbol_comp + part_in_backbone_seq = sbol_comp.sequences[0] else: part_in_backbone_component, part_in_backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) part_in_backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) @@ -661,6 +661,7 @@ def part_in_backbone_from_sbol(identity: Union[str, None], sbol3_comp: sbol3.Co part_sequence_feature = sbol3.SequenceFeature(locations=[part_location_comp], roles=part_roles) part_sequence_feature.roles.append(tyto.SO.engineered_insert) insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + #TODO: infer topology from the input if linear: part_in_backbone_component.types.append(sbol3.SO_LINEAR) part_in_backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) From 5d86cf5e2e93f5c547345afccfca987526415c33 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Tue, 2 May 2023 20:30:40 +0100 Subject: [PATCH 30/42] minor changes on comments --- test/test_component.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_component.py b/test/test_component.py index ada2287d..ce21aed2 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -461,6 +461,7 @@ def test_assembly_plan_bp011(self): podd_backbone, podd_backbone_seq = backbone('pOdd_bb', lvl1_pOdd_acceptor_seq, [1169,2259], 4, False, name='pOdd_bb') doc.add([podd_backbone,podd_backbone_seq]) #parts in backbone + ##get parts from genbank j23100_b0034_dir = os.path.join(test_dir, 'test_files', 'j23100_b0034.gb') sfgfp_dir = os.path.join(test_dir, 'test_files', 'sfgfp.gb') b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') @@ -473,6 +474,7 @@ def test_assembly_plan_bp011(self): b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements + ##SBOL parts in backbone j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone_from_sbol('j23100_b0034_ac_in_bb', j23100_b0034_ac, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') doc.add([j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq]) sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq = part_in_backbone_from_sbol('sfgfp_ce_in_bb', sfgfp_ce, [130,854], [sbol3.SO_CDS], 4, False, name='sfgfp_ce_in_bb') From 468d94a8c1b0bcbd89bdf5273238fadeaf5d12fb Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Fri, 27 Oct 2023 14:37:42 +0100 Subject: [PATCH 31/42] combinatorial assembly and backbone from sbol --- sbol_utilities/component.py | 258 +++++++++++++++++++------------- test/test_component.py | 110 +++++++++----- test/test_files/ab_j23100.gb | 80 ++++++++++ test/test_files/ab_j23101.gb | 81 ++++++++++ test/test_files/bc_b0034.gb | 81 ++++++++++ test/test_files/ce_ecfp.gb | 93 ++++++++++++ test/test_files/ce_gfp.gb | 93 ++++++++++++ test/test_files/ce_mrfp1.gb | 92 ++++++++++++ test/test_files/ef_b0015.gb | 95 ++++++++++++ test/test_files/j23100_b0034.gb | 82 ---------- test/test_files/podd1.gb | 221 +++++++++++++++++++++++++++ 11 files changed, 1059 insertions(+), 227 deletions(-) create mode 100644 test/test_files/ab_j23100.gb create mode 100644 test/test_files/ab_j23101.gb create mode 100644 test/test_files/bc_b0034.gb create mode 100644 test/test_files/ce_ecfp.gb create mode 100644 test/test_files/ce_gfp.gb create mode 100644 test/test_files/ce_mrfp1.gb create mode 100644 test/test_files/ef_b0015.gb delete mode 100644 test/test_files/j23100_b0034.gb create mode 100644 test/test_files/podd1.gb diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 6e2db7ac..e3cc6b4c 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -10,7 +10,7 @@ from Bio import Restriction from pydna.dseqrecord import Dseqrecord - +from itertools import product # TODO: consider allowing return of LocalSubComponent and ExternallyDefined def contained_components(roots: Union[sbol3.TopLevel, Iterable[sbol3.TopLevel]]) -> set[sbol3.Component]: @@ -541,7 +541,7 @@ def backbone(identity: str, sequence: str, dropout_location: List[int], fusion_s :param sequence: The DNA sequence of the Component encoded in IUPAC. :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) - :param linear: Boolean than indicates if the backbone is linear, by default it is seted to Flase which means that it has a circular topology. + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to False which means that it has a circular topology. :param kwargs: Keyword arguments of any other Component attribute. :return: A tuple of Component and Sequence. """ @@ -573,6 +573,52 @@ def backbone(identity: str, sequence: str, dropout_location: List[int], fusion_s backbone_component.constraints.append(backbone_dropout_meets) return backbone_component, backbone_seq +def backbone_from_sbol(identity: Union[str,None], sbol_comp: sbol3.Component, dropout_location: List[int], fusion_site_length:int, linear:bool, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: + """Creates a Backbone Component and its Sequence. + + :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. + :param sequence: The DNA sequence of the Component encoded in IUPAC. + :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. + :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) + :param linear: Boolean than indicates if the backbone is linear, by default it is seted to False which means that it has a circular topology. + :param kwargs: Keyword arguments of any other Component attribute. + :return: A tuple of Component and Sequence. + """ + if len(dropout_location) != 2: + raise ValueError('The dropout_location only accepts 2 int values in a list.') + if len(sbol_comp.sequences)!=1: + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences') + sequence = str(sbol_comp.sequences[0].lookup().elements) + if identity == None: + backbone_component = sbol_comp + backbone_seq = sbol_comp.sequences[0] + else: + backbone_component, backbone_seq = dna_component_with_sequence(identity, sequence, **kwargs) + backbone_component.roles.append(sbol3.SO_DOUBLE_STRANDED) + dropout_location_comp = sbol3.Range(sequence=backbone_seq, start=dropout_location[0], end=dropout_location[1]) + insertion_site_location1 = sbol3.Range(sequence=backbone_seq, start=dropout_location[0], end=dropout_location[0]+fusion_site_length, order=1) + insertion_site_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=dropout_location[1], order=3) + dropout_sequence_feature = sbol3.SequenceFeature(locations=[dropout_location_comp], roles=[tyto.SO.deletion]) + insertion_sites_feature = sbol3.SequenceFeature(locations=[insertion_site_location1, insertion_site_location2], roles=[tyto.SO.insertion_site]) + if linear: + backbone_component.types.append(sbol3.SO_LINEAR) + backbone_component.roles.append(sbol3.SO_ENGINEERED_REGION) + open_backbone_location1 = sbol3.Range(sequence=backbone_seq, start=1, end=dropout_location[0]+fusion_site_length-1, order=1) + open_backbone_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=len(sequence), order=3) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + else: + backbone_component.types.append(sbol3.SO_CIRCULAR) + backbone_component.roles.append(tyto.SO.plasmid_vector) + open_backbone_location1 = sbol3.Range(sequence=backbone_seq, start=1, end=dropout_location[0]+fusion_site_length-1, order=2) + open_backbone_location2 = sbol3.Range(sequence=backbone_seq, start=dropout_location[1]-fusion_site_length, end=len(sequence), order=1) + open_backbone_feature = sbol3.SequenceFeature(locations=[open_backbone_location1, open_backbone_location2]) + backbone_component.features.append(dropout_sequence_feature) + backbone_component.features.append(insertion_sites_feature) + backbone_component.features.append(open_backbone_feature) + backbone_dropout_meets = sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=dropout_sequence_feature, object=open_backbone_feature) + backbone_component.constraints.append(backbone_dropout_meets) + return backbone_component, backbone_seq + def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Component, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: """Creates a Part in Backbone Component and its Sequence. @@ -601,6 +647,10 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo open_backbone_sequence_from_location2=backbone_sequence[backbone.features[-1].locations[1].start -1 : backbone.features[-1].locations[1].end] # extract part sequence part_sequence = part.sequences[0].lookup().elements + covered_fusion_site_lengths = [3,4] # review can be user input + for l in covered_fusion_site_lengths: + if open_backbone_sequence_from_location1[-l:]==part_sequence[:l] and part_sequence[-l:]==open_backbone_sequence_from_location2[:l]: + part_sequence = part_sequence[l:-l] # make new component sequence if linear: part_in_backbone_seq_str = open_backbone_sequence_from_location1 + part_sequence + open_backbone_sequence_from_location2 @@ -622,6 +672,8 @@ def part_in_backbone(identity: str, part: sbol3.Component, backbone: sbol3.Compo part_in_backbone_component.features.append(backbone_subcomponent) # adding topology part_in_backbone_component.types.append(topology_type) + #if len(part_in_backbone_component.name)==0: # TODO: review + # part_in_backbone_component.name = identity return part_in_backbone_component, part_in_backbone_seq def part_in_backbone_from_sbol(identity: Union[str, None], sbol_comp: sbol3.Component, part_location: List[int], part_roles:List[str], fusion_site_length:int, linear:bool=False, **kwargs) -> Tuple[sbol3.Component, sbol3.Sequence]: @@ -643,7 +695,7 @@ def part_in_backbone_from_sbol(identity: Union[str, None], sbol_comp: sbol3.Com if len(part_location) != 2: raise ValueError('The part_location only accepts 2 int values in a list.') if len(sbol_comp.sequences)!=1: - raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol3_comp.sequences)} sequences') + raise ValueError(f'The reactant needs to have precisely one sequence. The input reactant has {len(sbol_comp.sequences)} sequences') sequence = sbol_comp.sequences[0].lookup().elements if identity == None: part_in_backbone_component = sbol_comp @@ -691,7 +743,7 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall """Digests a Component using the provided restriction enzymes and creates a product Component and a digestion Interaction. The product Component is assumed to be the insert for parts in backbone and the backbone for backbones. - :param reactant: DNA to be digested as SBOL Component. + :param reactant: DNA to be digested as SBOL Component, usually a part_in_backbone. :param restriction_enzymes: Restriction enzymes used Externally Defined. :return: A tuple of Component and Interaction. """ @@ -744,8 +796,9 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_part_extract', sequence=product_sequence, **kwargs) #str(product_sequence)) # add sticky ends features five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) - three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end), end=len(product_sequence), order=3) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3) fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.roles.append(tyto.SO.engineered_insert) prod_comp.features.append(fusion_sites_feature) # if backbone @@ -757,8 +810,9 @@ def digestion(reactant:sbol3.Component, restriction_enzymes:List[sbol3.Externall prod_comp, prod_seq = dna_component_with_sequence(identity=f'{reactant.name}_backbone', sequence=product_sequence, **kwargs) #str(product_sequence)) # add sticky ends features five_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=1, end=len(product_5_prime_ss_end), order=1) - three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end), end=len(product_sequence), order=3) + three_prime_fusion_site_location = sbol3.Range(sequence=product_sequence, start=len(product_sequence)-len(product_3_prime_ss_end)+1, end=len(product_sequence), order=3) fusion_sites_feature = sbol3.SequenceFeature(locations=[five_prime_fusion_site_location, three_prime_fusion_site_location], roles=[tyto.SO.insertion_site]) + prod_comp.roles.append(tyto.SO.plasmid_vector) prod_comp.features.append(fusion_sites_feature) else: raise NotImplementedError('The reactant has no valid roles') @@ -789,113 +843,102 @@ def ligation(reactants:List[sbol3.Component], assembly_plan:sbol3.Component)-> L :param reactant: DNA to be ligated as SBOL Component. :return: A tuple of Component and Interaction. """ - # get all fusion sites - five_prime_fusion_sites = set() - three_prime_fusion_sites = set() - for r in reactants: - five_prime_fusion_sites.add(r.sequences[0].lookup().elements[:r.features[0].locations[0].end]) - three_prime_fusion_sites.add(r.sequences[0].lookup().elements[r.features[0].locations[1].start:]) - - alignments = [[r] for r in reactants] # like [[A],[B1],[B2],[C]]] and [[A,B1,C],[B1],[B2],[C]] - used_fusion_sites = set() - final_products = [] # [[A,B1,C]] - while alignments: - closed = False - five_prime_end = False - three_prime_end = False - # get the first item and remove it from the list - working_alignment = alignments[0] - alignments.pop(0) - # compare to all other alignments - for alignment in alignments: - working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] - working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] - alignment_5_prime_fusion_site = alignment[0].sequences[0].lookup().elements[:alignment[0].features[0].locations[0].end] - alignment_3_prime_fusion_site = alignment[-1].sequences[0].lookup().elements[alignment[-1].features[0].locations[1].start:] - # if working alignment 5' end matches a alignment 3' end - if working_alignment_5_prime_fusion_site == alignment_3_prime_fusion_site: - # if in used_fusion_sites, skip - if working_alignment_5_prime_fusion_site in used_fusion_sites: - raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") - else: used_fusion_sites.add(working_alignment_5_prime_fusion_site) - - - working_alignment = alignment + working_alignment - - working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] - working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] - - # if working alignment 5' end does not matches any 3' fusion site - if working_alignment_5_prime_fusion_site not in three_prime_fusion_sites: - five_prime_end = True - - # if working_alignment is closed, add to final_products - if working_alignment_5_prime_fusion_site == working_alignment_3_prime_fusion_site: - final_products.append(working_alignment) - closed = True - break - - ################################################ - # if working alignment 3' end matches a alignment 5' end - if working_alignment_3_prime_fusion_site == alignment_5_prime_fusion_site: - # if in used_fusion_sites, raise error - if working_alignment_3_prime_fusion_site in used_fusion_sites: - raise ValueError(f"Fusion site {working_alignment[0].sequences[0].lookup().elements[:fusion_site_length-1]} already used") - - working_alignment = working_alignment + alignment - - working_alignment_5_prime_fusion_site = working_alignment[0].sequences[0].lookup().elements[:working_alignment[0].features[0].locations[0].end] - working_alignment_3_prime_fusion_site = working_alignment[-1].sequences[0].lookup().elements[working_alignment[-1].features[0].locations[1].start:] - - # if working alignment 5' end does not matches any 3' fusion site - if working_alignment_3_prime_fusion_site not in five_prime_fusion_sites: - three_prime_end = True - - # if working_alignment is closed, add to final_products - if working_alignment_5_prime_fusion_site == working_alignment_3_prime_fusion_site: - final_products.append(working_alignment) - closed = True - break - # if no match, add to final products - if five_prime_end and three_prime_end: - final_products.append(working_alignment) - break + # Create a dictionary that maps each first and last 4 letters to a list of strings that have those letters. + reactant_parts = [] + fusion_sites_set = set() + for reactant in reactants: + fusion_site_3prime_length = reactant.features[0].locations[0].end - reactant.features[0].locations[0].start + fusion_site_5prime_length = reactant.features[0].locations[1].end - reactant.features[0].locations[1].start + if fusion_site_3prime_length == fusion_site_5prime_length: + fusion_site_length = fusion_site_3prime_length + 1 # if the fusion site is 4 bp long, the start will be 1 and end 4, 4-1 = 3, so we add 1 to get 4. + fusion_sites_set.add(fusion_site_length) + if len(fusion_sites_set) > 1: + raise ValueError(f'Fusion sites of different length within different parts. Check {reactant.identity} ') + else: + raise ValueError(f'Fusion sites of different length within the same part. Check {reactant.identity}') + if tyto.SO.plasmid_vector in reactant.roles: + reactant_parts.append(reactant) + elif tyto.SO.engineered_insert in reactant.roles: + reactant_parts.append(reactant) + else: + raise ValueError(f'Part {reactant.identity} does not have a valid role') + # remove the backbones if any from the reactants, to create the composite + groups = {} + for reactant in reactant_parts: + first_four_letters = reactant.sequences[0].lookup().elements[:fusion_site_length].lower() + last_four_letters = reactant.sequences[0].lookup().elements[-fusion_site_length:].lower() + part_syntax = f'{first_four_letters}_{last_four_letters}' + if part_syntax not in groups: + groups[part_syntax] = [] + groups[part_syntax].append(reactant) + else: groups[part_syntax].append(reactant) + # groups is a dictionary of lists of parts that have the same first and last 4 letters + # list_of_combinations_per_assembly is a list of tuples of parts that can be ligated together + list_of_parts_per_combination = list(product(*groups.values())) #cartesian product + # create list_of_composites_per_assembly from list_of_combinations_per_assembly + list_of_composites_per_assembly = [] + for combination in list_of_parts_per_combination: + list_of_parts_per_composite = [combination[0]] + insert_sequence = combination[0].sequences[0].lookup().elements + remaining_parts = list(combination[1:]) + it = 1 + while remaining_parts: + remaining_parts_before = len(remaining_parts) + for part in remaining_parts: + # match insert sequence 5' to part 3' + if part.sequences[0].lookup().elements[:fusion_site_length].lower() == insert_sequence[-fusion_site_length:].lower(): + insert_sequence = insert_sequence[:-fusion_site_length] + part.sequences[0].lookup().elements + list_of_parts_per_composite.append(part) + remaining_parts.remove(part) + # match insert sequence 3' to part 5' + elif part.sequences[0].lookup().elements[-fusion_site_length:].lower() == insert_sequence[:fusion_site_length].lower(): + insert_sequence = part.sequences[0].lookup().elements + insert_sequence[fusion_site_length:] + list_of_parts_per_composite.insert(0, part) + remaining_parts.remove(part) + remaining_parts_after = len(remaining_parts) - # TODO: feed working alignment to alignments - - # use final products to build assembly product somponent - fusion_site_length = 4 - products_list = [] - participations = [] - for composite in final_products: # a composite of the form [A,B,C] - composite_number = 0 - # calculate sequence - composite_sequence_str = "" - composite_name = "" - #part_subcomponents = [] - part_extract_subcomponents = [] - for part_extract in composite: - composite_sequence_str = composite_sequence_str + part_extract.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear - # create participations - part_extract_subcomponent = sbol3.SubComponent(part_extract) # LocalSubComponent?? - part_extract_subcomponents.append(part_extract_subcomponent) - composite_name = composite_name + part_extract.name - # create dna componente and sequence - composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}_{composite_name}', composite_sequence_str) # **kwarads use in future? - composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) - composite_component.features = part_extract_subcomponents - # TODO fix order of features - products_list.append([composite_component, composite_seq]) - composite_number += 1 - # TODO: modify to work outside of an assembly plan as well - + if remaining_parts_before == remaining_parts_after: + it += 1 + if it > 5: #5 was chosen arbitrarily to avoid infinite loops + print(groups) + raise ValueError('No match found, check the parts and their fusion sites') + list_of_composites_per_assembly.append(list_of_parts_per_composite) + + # transform list_of_parts_per_assembly into list of composites + products_list = [] + participations = [] + composite_number = 1 + for composite in list_of_composites_per_assembly: # a composite of the form [A,B,C] + # calculate sequence + composite_sequence_str = "" + composite_name = "" + #part_subcomponents = [] + part_extract_subcomponents = [] + for part_extract in composite: + composite_sequence_str = composite_sequence_str + part_extract.sequences[0].lookup().elements[:-fusion_site_length] #needs a version for linear + # create participations + part_extract_subcomponent = sbol3.SubComponent(part_extract) # LocalSubComponent?? + part_extract_subcomponents.append(part_extract_subcomponent) + composite_name = composite_name +'_'+ part_extract.name + # create dna componente and sequence + composite_component, composite_seq = dna_component_with_sequence(f'composite_{composite_number}{composite_name}', composite_sequence_str) # **kwarads use in future? + composite_component.name = f'composite_{composite_number}{composite_name}' + composite_component.roles.append(sbol3.SO_ENGINEERED_REGION) + composite_component.features = part_extract_subcomponents + for i in range(len(composite_component.features )-1): + composite_component.constraints = [sbol3.Constraint(restriction='http://sbols.org/v3#meets', subject=composite_component.features[i], object=composite_component.features[i+1])] + products_list.append([composite_component, composite_seq]) + composite_number += 1 return products_list + class Assembly_plan_composite_in_backbone_single_enzyme(): """Creates a Assembly Plan. + :param name: Name of the assembly plan Component. :param parts_in_backbone: Parts in backbone to be assembled. :param acceptor_backbone: Backbone in which parts are inserted on the assembly. :param restriction_enzymes: Restriction enzyme with correct name from Bio.Restriction as Externally Defined. + :param document: SBOL Document where the assembly plan will be created. :param linear: Boolean to inform if the reactant is linear. :param circular: Boolean to inform if the reactant is circular. :param **kwargs: Keyword arguments of any other Component attribute for the assembled part. @@ -920,11 +963,10 @@ def run(self): #extract parts part_number = 1 for part_in_backbone in self.parts_in_backbone: - part_comp, part_seq = digestion(reactant=part_in_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}') + part_comp, part_seq = digestion(reactant=part_in_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}_{part_in_backbone.display_id}') self.document.add([part_comp, part_seq]) self.extracted_parts.append(part_comp) part_number += 1 - #extract backbone (should be the same?) backbone_comp, backbone_seq = digestion(reactant=self.acceptor_backbone,restriction_enzymes=[self.restriction_enzyme], assembly_plan=self.assembly_plan_component, name=f'part_{part_number}') self.document.add([backbone_comp, backbone_seq]) @@ -936,4 +978,4 @@ def run(self): composite[0].generated_by.append(self.assembly_plan_component) # self.composites.append(composite) self.products.append(composite) - self.document.add(composite) \ No newline at end of file + self.document.add(composite) diff --git a/test/test_component.py b/test/test_component.py index ce21aed2..5cc28ef2 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -15,7 +15,7 @@ protein_stability_element, gene, operator, engineered_region, mrna, transcription_factor, \ strain, ed_simple_chemical, ed_protein -from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone_from_sbol, \ +from sbol_utilities.component import ed_restriction_enzyme, backbone, backbone_from_sbol, part_in_backbone, part_in_backbone_from_sbol, \ digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme from sbol_utilities.helper_functions import find_top_level, toplevel_named, TopLevelNotFound, outgoing_links from sbol_utilities.sbol_diff import doc_diff @@ -362,6 +362,21 @@ def test_backbone_bp011(self): linear_backbone_component.constraints.append(backbone_dropout_meets) doc.add([linear_backbone_component, linear_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity}' + + #Test backbone from SBOL + hlc_doc = sbol3.Document() + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + # build using backbone from SBOL + doc.add([circular_backbone_component, circular_backbone_seq]) + hl_circular_backbone_component, hl_circular_backbone_seq = backbone_from_sbol(identity=backbone_identity, sbol_comp=circular_backbone_component, dropout_location=dropout_location, fusion_site_length=fusion_site_length, linear=False, description=test_description) + hlc_doc.add([hl_circular_backbone_component, hl_circular_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Circular {backbone_identity} from SBOL' + + doc.add([linear_backbone_component, linear_backbone_seq]) + hl_linear_backbone_component, hl_linear_backbone_seq = backbone_from_sbol(identity=backbone_identity, sbol_comp=linear_backbone_component, dropout_location=dropout_location, fusion_site_length=fusion_site_length, linear=True, description=test_description) + hlc_doc.add([hl_linear_backbone_component, hl_linear_backbone_seq]) + assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Linear {backbone_identity} from SBOL' def test_part_in_backbone_bp011(self): """Test the part_in_backbone function""" @@ -462,53 +477,74 @@ def test_assembly_plan_bp011(self): doc.add([podd_backbone,podd_backbone_seq]) #parts in backbone ##get parts from genbank + podd1_dir = os.path.join(test_dir, 'test_files', 'podd1.gb') j23100_b0034_dir = os.path.join(test_dir, 'test_files', 'j23100_b0034.gb') sfgfp_dir = os.path.join(test_dir, 'test_files', 'sfgfp.gb') + rhlr_dir = os.path.join(test_dir, 'test_files', 'rhlr.gb') b0015_dir = os.path.join(test_dir, 'test_files', 'b0015.gb') - j23100_b0034_doc = convert_from_genbank(j23100_b0034_dir, 'https://github.com/Gonza10V') - j23100_b0034_ac = [top_level for top_level in j23100_b0034_doc if type(top_level)==sbol3.Component][0] - j23100_b0034_ac_seq_str = j23100_b0034_ac.sequences[0].lookup().elements - sfgfp_doc = convert_from_genbank(sfgfp_dir, 'https://github.com/Gonza10V') - sfgfp_ce = [top_level for top_level in sfgfp_doc if type(top_level)==sbol3.Component][0] - sfgfp_ce_seq_str = sfgfp_ce.sequences[0].lookup().elements + + """Test assembly plan class""" + doc = sbol3.Document() + sbol3.set_namespace('http://sbolstandard.org/testfiles') + # Assembly plan setup + bsai = ed_restriction_enzyme('BsaI') + #lvl1 acceptor + podd1_dir = os.path.join(test_dir, 'test_files', 'podd1.gb') + podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V') + podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0] + podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb') + doc.add([podd_backbone,podd_backbone_seq]) + #parts in backbone + ##get parts from genbank + j23100_dir = os.path.join(test_dir, 'test_files', 'ab_j23100.gb') + j23101_dir = os.path.join(test_dir, 'test_files', 'ab_j23101.gb') + b0034_dir = os.path.join(test_dir, 'test_files', 'bc_b0034.gb') + gfp_dir = os.path.join(test_dir, 'test_files', 'ce_gfp.gb') + rfp_dir = os.path.join(test_dir, 'test_files', 'ce_mrfp1.gb') + cfp_dir = os.path.join(test_dir, 'test_files', 'ce_ecfp.gb') + b0015_dir = os.path.join(test_dir, 'test_files', 'ef_b0015.gb') + j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V') + j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0] + j23101_doc = convert_from_genbank(j23101_dir, 'https://github.com/Gonza10V') + j23101_ab = [top_level for top_level in j23101_doc if type(top_level)==sbol3.Component][0] + b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V') + b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0] + gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V') + gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0] + rfp_doc = convert_from_genbank(rfp_dir, 'https://github.com/Gonza10V') + rfp_ce = [top_level for top_level in rfp_doc if type(top_level)==sbol3.Component][0] + cfp_doc = convert_from_genbank(cfp_dir, 'https://github.com/Gonza10V') + cfp_ce = [top_level for top_level in cfp_doc if type(top_level)==sbol3.Component][0] b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V') b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0] - b0015_ef_seq_str = b0015_ef.sequences[0].lookup().elements ##SBOL parts in backbone - j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq = part_in_backbone_from_sbol('j23100_b0034_ac_in_bb', j23100_b0034_ac, [476,545], [sbol3.SO_PROMOTER, sbol3.SO_RBS], 4, False, name='j23100_b0034_ac_in_bb') - doc.add([j23100_b0034_ac_in_bb, j23100_b0034_ac_in_bb_seq]) - sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq = part_in_backbone_from_sbol('sfgfp_ce_in_bb', sfgfp_ce, [130,854], [sbol3.SO_CDS], 4, False, name='sfgfp_ce_in_bb') - doc.add([sfgfp_ce_in_bb, sfgfp_ce_in_bb_seq]) + j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb') + doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq]) + j23101_ab_in_bb, j23101_ab_in_bb_seq = part_in_backbone_from_sbol('j23101_ab_in_bb', j23101_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23101_ab_in_bb') + doc.add([j23101_ab_in_bb, j23101_ab_in_bb_seq]) + b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb') + doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq]) + gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb') + doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq]) + rfp_ce_in_bb, rfp_ce_in_bb_seq = part_in_backbone_from_sbol('rfp_ce_in_bb', rfp_ce, [479,1156], [sbol3.SO_CDS], 4, False, name='rfp_ce_in_bb') + doc.add([rfp_ce_in_bb, rfp_ce_in_bb_seq]) + cfp_ce_in_bb, cfp_ce_in_bb_seq = part_in_backbone_from_sbol('cfp_ce_in_bb', cfp_ce, [479,1198], [sbol3.SO_CDS], 4, False, name='cfp_ce_in_bb') + doc.add([cfp_ce_in_bb, cfp_ce_in_bb_seq]) b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb') doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq]) + + #Assembly plan - test_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( - name='constitutive_gfp_tu', - parts_in_backbone=[j23100_b0034_ac_in_bb, sfgfp_ce_in_bb, b0015_ef_in_bb], + combinatorial_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( + name='combinatorial_rgb_transcriptional_units', + parts_in_backbone=[j23100_ab_in_bb, j23101_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, rfp_ce_in_bb, cfp_ce_in_bb, b0015_ef_in_bb], acceptor_backbone=podd_backbone, restriction_enzyme=bsai, document=doc) - test_assembly_plan.run() - #Check assembly plan - expected_assembled_j23100_b0034_ac_seq_str = j23100_b0034_ac_seq_str[475:545] - assembled_j23100_b0034_ac_seq_str = test_assembly_plan.extracted_parts[0].sequences[0].lookup().elements - assert expected_assembled_j23100_b0034_ac_seq_str==assembled_j23100_b0034_ac_seq_str, 'Constructor Error: First extracted part sequence does not match expected sequence' - - expected_assembled_sfgfp_ce_seq_str = sfgfp_ce_seq_str[129:854] - assembled_sfgfp_ce_seq_str = test_assembly_plan.extracted_parts[1].sequences[0].lookup().elements - assert expected_assembled_sfgfp_ce_seq_str==assembled_sfgfp_ce_seq_str, 'Constructor Error: Second extracted part sequence does not match expected sequence' - - expected_assembled_b0015_ef_seq_str = b0015_ef_seq_str[513:650] - assembled_b0015_ef_seq_str = test_assembly_plan.extracted_parts[2].sequences[0].lookup().elements - assert expected_assembled_b0015_ef_seq_str==assembled_b0015_ef_seq_str, 'Constructor Error: Third extracted part sequence does not match expected sequence' - - expected_assembled_open_backbone_seq_str = lvl1_pOdd_acceptor_seq[2255:] + lvl1_pOdd_acceptor_seq[:1172] - assembled_open_backbone_seq_str = test_assembly_plan.extracted_parts[-1].sequences[0].lookup().elements - assert expected_assembled_open_backbone_seq_str==assembled_open_backbone_seq_str, 'Constructor Error: Last extracted part (open backbone) sequence does not match expected sequence' - - expected_composite_seq_str = expected_assembled_open_backbone_seq_str[:-4] + expected_assembled_j23100_b0034_ac_seq_str[:-4] + expected_assembled_sfgfp_ce_seq_str[:-4] + expected_assembled_b0015_ef_seq_str[:-4] - assembled_composite_seq_str = test_assembly_plan.composites[0][0].sequences[0].lookup().elements - assert expected_composite_seq_str==assembled_composite_seq_str, 'Constructor Error: Composite sequence does not match expected sequence' - + combinatorial_assembly_plan.run() + for obj in combinatorial_assembly_plan.document.objects: + if obj.identity =='http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb': + obtained_sequence = obj.sequences[0].lookup().elements + target_sequence = 'cgctgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgGGAGttgacggctagctcagtcctaggtacagtgctagcTACTagagaaagaggagaaatactaaatggtgagcaagggcgaggagctgttcaccggggtggtgcccatcctggtcgagctggacggcgacgtgaacggccacaagttcagcgtgtccggcgagggcgagggcgatgccacctacggcaagctgaccctgaagttcatctgcaccaccggcaagctgcccgtgccctggcccaccctcgtgaccaccctgacctggggcgtgcagtgcttcagccgctaccccgaccacatgaagcagcacgacttcttcaagtccgccatgcccgaaggctacgtccaggagcgcaccatcttcttcaaggacgacggcaactacaagacccgcgccgaggtgaagttcgagggcgacaccctggtgaaccgcatcgagctgaagggcatcgacttcaaggaggacggcaacatcctggggcacaagctggagtacaactacatcagccacaacgtctatatcaccgccgacaagcagaagaacggcatcaaggccaacttcaagatccgccacaacatcgaggacggcagcgtgcagctcgccgaccactaccagcagaacacccccatcggcgacggccccgtgctgctgcccgacaaccactacctgagcacccagtccgccctgagcaaagaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcactctcggcatggacgagctgtacaagtaataaGCTTccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata' if __name__ == '__main__': unittest.main() diff --git a/test/test_files/ab_j23100.gb b/test/test_files/ab_j23100.gb new file mode 100644 index 00000000..de329983 --- /dev/null +++ b/test/test_files/ab_j23100.gb @@ -0,0 +1,80 @@ +LOCUS AB_J23100 2095 bp ds-DNA linear 25-OCT-2023 +DEFINITION . +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="A" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 479..513 + /label="J23100" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 514..517 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 531..602 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(666..685) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(770..1384) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1551..1656) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgGGAGtt + 481 gacggctagc tcagtcctag gtacagtgct agcTACTCGA Gaccctgcag tccggcaaaa + 541 aagggcaagg tgtcaccacc ctgccctttt tctttaaaac cgaaaagatt acttcgcgtt + 601 atgcaggctt cctcgctcac tgactcgctg cgctcggtcg ttcggctgcg gcgagcggta + 661 tcagctcact caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag + 721 aacatgtgag caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg + 781 tttttccaca ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg + 841 tggcgaaacc cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg + 901 cgctctcctg ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga + 961 agcgtggcgc tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc + 1021 tccaagctgg gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt + 1081 aactatcgtc ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact + 1141 ggtaacagga ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg + 1201 cctaactacg gctacactag aagaacagta tttggtatct gcgctctgct gaagccagtt + 1261 accttcggaa aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt + 1321 ggtttttttg tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct + 1381 ttgatctttt ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg + 1441 gtcatgagat tatcaaaaag gatcttcacc tagatccttt taaattaaaa atgaagtttt + 1501 aaatcaatct aaagtatata tgagtaaact tggtctgaca gctcgaggct tggattctca + 1561 ccaataaaaa acgcccggcg gcaaccgagc gttctgaaca aatccagatg gagttctgag + 1621 gtcattactg gatctatcaa caggagtcca agcgagctcg atatcaaatt acgccccgcc + 1681 ctgccactca tcgcagtact gttgtaattc attaagcatt ctgccgacat ggaagccatc + 1741 acaaacggca tgatgaacct gaatcgccag cggcatcagc accttgtcgc cttgcgtata + 1801 atatttgccc atggtgaaaa cgggggcgaa gaagttgtcc atattggcca cgtttaaatc + 1861 aaaactggtg aaactcaccc agggattggc tgagacgaaa aacatattct caataaaccc + 1921 tttagggaaa taggccaggt tttcaccgta acacgccaca tcttgcgaat atatgtgtag + 1981 aaactgccgg aaatcgtcgt ggtattcact ccagagcgat gaaaacgttt cagtttgctc + 2041 atggaaaacg gtgtaacaag ggtgaacact atcccatatc accagctcac cgtct +// \ No newline at end of file diff --git a/test/test_files/ab_j23101.gb b/test/test_files/ab_j23101.gb new file mode 100644 index 00000000..6f534a76 --- /dev/null +++ b/test/test_files/ab_j23101.gb @@ -0,0 +1,81 @@ +LOCUS AB_J23101 2095 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="A" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc_feature 479..513 + /label="J23101" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 514..517 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 531..602 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(666..685) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(770..1384) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1551..1656) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgGGAGtt + 481 tacagctagc tcagtcctag gtattatgct agcTACTCGA Gaccctgcag tccggcaaaa + 541 aagggcaagg tgtcaccacc ctgccctttt tctttaaaac cgaaaagatt acttcgcgtt + 601 atgcaggctt cctcgctcac tgactcgctg cgctcggtcg ttcggctgcg gcgagcggta + 661 tcagctcact caaaggcggt aatacggtta tccacagaat caggggataa cgcaggaaag + 721 aacatgtgag caaaaggcca gcaaaaggcc aggaaccgta aaaaggccgc gttgctggcg + 781 tttttccaca ggctccgccc ccctgacgag catcacaaaa atcgacgctc aagtcagagg + 841 tggcgaaacc cgacaggact ataaagatac caggcgtttc cccctggaag ctccctcgtg + 901 cgctctcctg ttccgaccct gccgcttacc ggatacctgt ccgcctttct cccttcggga + 961 agcgtggcgc tttctcatag ctcacgctgt aggtatctca gttcggtgta ggtcgttcgc + 1021 tccaagctgg gctgtgtgca cgaacccccc gttcagcccg accgctgcgc cttatccggt + 1081 aactatcgtc ttgagtccaa cccggtaaga cacgacttat cgccactggc agcagccact + 1141 ggtaacagga ttagcagagc gaggtatgta ggcggtgcta cagagttctt gaagtggtgg + 1201 cctaactacg gctacactag aagaacagta tttggtatct gcgctctgct gaagccagtt + 1261 accttcggaa aaagagttgg tagctcttga tccggcaaac aaaccaccgc tggtagcggt + 1321 ggtttttttg tttgcaagca gcagattacg cgcagaaaaa aaggatctca agaagatcct + 1381 ttgatctttt ctacggggtc tgacgctcag tggaacgaaa actcacgtta agggattttg + 1441 gtcatgagat tatcaaaaag gatcttcacc tagatccttt taaattaaaa atgaagtttt + 1501 aaatcaatct aaagtatata tgagtaaact tggtctgaca gctcgaggct tggattctca + 1561 ccaataaaaa acgcccggcg gcaaccgagc gttctgaaca aatccagatg gagttctgag + 1621 gtcattactg gatctatcaa caggagtcca agcgagctcg atatcaaatt acgccccgcc + 1681 ctgccactca tcgcagtact gttgtaattc attaagcatt ctgccgacat ggaagccatc + 1741 acaaacggca tgatgaacct gaatcgccag cggcatcagc accttgtcgc cttgcgtata + 1801 atatttgccc atggtgaaaa cgggggcgaa gaagttgtcc atattggcca cgtttaaatc + 1861 aaaactggtg aaactcaccc agggattggc tgagacgaaa aacatattct caataaaccc + 1921 tttagggaaa taggccaggt tttcaccgta acacgccaca tcttgcgaat atatgtgtag + 1981 aaactgccgg aaatcgtcgt ggtattcact ccagagcgat gaaaacgttt cagtttgctc + 2041 atggaaaacg gtgtaacaag ggtgaacact atcccatatc accagctcac cgtct +// \ No newline at end of file diff --git a/test/test_files/bc_b0034.gb b/test/test_files/bc_b0034.gb new file mode 100644 index 00000000..16872bdd --- /dev/null +++ b/test/test_files/bc_b0034.gb @@ -0,0 +1,81 @@ +LOCUS BC_B0034 2081 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="B" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + misc_feature 479..499 + /label="B0034" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 500..503 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 517..588 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(652..671) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(756..1370) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1537..1642) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgTACTag + 481 agaaagagga gaaatactaa atgCGAGacc ctgcagtccg gcaaaaaagg gcaaggtgtc + 541 accaccctgc cctttttctt taaaaccgaa aagattactt cgcgttatgc aggcttcctc + 601 gctcactgac tcgctgcgct cggtcgttcg gctgcggcga gcggtatcag ctcactcaaa + 661 ggcggtaata cggttatcca cagaatcagg ggataacgca ggaaagaaca tgtgagcaaa + 721 aggccagcaa aaggccagga accgtaaaaa ggccgcgttg ctggcgtttt tccacaggct + 781 ccgcccccct gacgagcatc acaaaaatcg acgctcaagt cagaggtggc gaaacccgac + 841 aggactataa agataccagg cgtttccccc tggaagctcc ctcgtgcgct ctcctgttcc + 901 gaccctgccg cttaccggat acctgtccgc ctttctccct tcgggaagcg tggcgctttc + 961 tcatagctca cgctgtaggt atctcagttc ggtgtaggtc gttcgctcca agctgggctg + 1021 tgtgcacgaa ccccccgttc agcccgaccg ctgcgcctta tccggtaact atcgtcttga + 1081 gtccaacccg gtaagacacg acttatcgcc actggcagca gccactggta acaggattag + 1141 cagagcgagg tatgtaggcg gtgctacaga gttcttgaag tggtggccta actacggcta + 1201 cactagaaga acagtatttg gtatctgcgc tctgctgaag ccagttacct tcggaaaaag + 1261 agttggtagc tcttgatccg gcaaacaaac caccgctggt agcggtggtt tttttgtttg + 1321 caagcagcag attacgcgca gaaaaaaagg atctcaagaa gatcctttga tcttttctac + 1381 ggggtctgac gctcagtgga acgaaaactc acgttaaggg attttggtca tgagattatc + 1441 aaaaaggatc ttcacctaga tccttttaaa ttaaaaatga agttttaaat caatctaaag + 1501 tatatatgag taaacttggt ctgacagctc gaggcttgga ttctcaccaa taaaaaacgc + 1561 ccggcggcaa ccgagcgttc tgaacaaatc cagatggagt tctgaggtca ttactggatc + 1621 tatcaacagg agtccaagcg agctcgatat caaattacgc cccgccctgc cactcatcgc + 1681 agtactgttg taattcatta agcattctgc cgacatggaa gccatcacaa acggcatgat + 1741 gaacctgaat cgccagcggc atcagcacct tgtcgccttg cgtataatat ttgcccatgg + 1801 tgaaaacggg ggcgaagaag ttgtccatat tggccacgtt taaatcaaaa ctggtgaaac + 1861 tcacccaggg attggctgag acgaaaaaca tattctcaat aaacccttta gggaaatagg + 1921 ccaggttttc accgtaacac gccacatctt gcgaatatat gtgtagaaac tgccggaaat + 1981 cgtcgtggta ttcactccag agcgatgaaa acgtttcagt ttgctcatgg aaaacggtgt + 2041 aacaagggtg aacactatcc catatcacca gctcaccgtc t +// \ No newline at end of file diff --git a/test/test_files/ce_ecfp.gb b/test/test_files/ce_ecfp.gb new file mode 100644 index 00000000..24b64599 --- /dev/null +++ b/test/test_files/ce_ecfp.gb @@ -0,0 +1,93 @@ +LOCUS CE_eCFP 2780 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1198 + /label="eCFP" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + misc_feature 1199..1202 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1216..1287 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1351..1370) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1455..2069) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2236..2341) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggt + 481 gagcaagggc gaggagctgt tcaccggggt ggtgcccatc ctggtcgagc tggacggcga + 541 cgtgaacggc cacaagttca gcgtgtccgg cgagggcgag ggcgatgcca cctacggcaa + 601 gctgaccctg aagttcatct gcaccaccgg caagctgccc gtgccctggc ccaccctcgt + 661 gaccaccctg acctggggcg tgcagtgctt cagccgctac cccgaccaca tgaagcagca + 721 cgacttcttc aagtccgcca tgcccgaagg ctacgtccag gagcgcacca tcttcttcaa + 781 ggacgacggc aactacaaga cccgcgccga ggtgaagttc gagggcgaca ccctggtgaa + 841 ccgcatcgag ctgaagggca tcgacttcaa ggaggacggc aacatcctgg ggcacaagct + 901 ggagtacaac tacatcagcc acaacgtcta tatcaccgcc gacaagcaga agaacggcat + 961 caaggccaac ttcaagatcc gccacaacat cgaggacggc agcgtgcagc tcgccgacca + 1021 ctaccagcag aacaccccca tcggcgacgg ccccgtgctg ctgcccgaca accactacct + 1081 gagcacccag tccgccctga gcaaagaccc caacgagaag cgcgatcaca tggtcctgct + 1141 ggagttcgtg accgccgccg ggatcactct cggcatggac gagctgtaca agtaataagc + 1201 ttCGAGaccc tgcagtccgg caaaaaaggg caaggtgtca ccaccctgcc ctttttcttt + 1261 aaaaccgaaa agattacttc gcgttatgca ggcttcctcg ctcactgact cgctgcgctc + 1321 ggtcgttcgg ctgcggcgag cggtatcagc tcactcaaag gcggtaatac ggttatccac + 1381 agaatcaggg gataacgcag gaaagaacat gtgagcaaaa ggccagcaaa aggccaggaa + 1441 ccgtaaaaag gccgcgttgc tggcgttttt ccacaggctc cgcccccctg acgagcatca + 1501 caaaaatcga cgctcaagtc agaggtggcg aaacccgaca ggactataaa gataccaggc + 1561 gtttccccct ggaagctccc tcgtgcgctc tcctgttccg accctgccgc ttaccggata + 1621 cctgtccgcc tttctccctt cgggaagcgt ggcgctttct catagctcac gctgtaggta + 1681 tctcagttcg gtgtaggtcg ttcgctccaa gctgggctgt gtgcacgaac cccccgttca + 1741 gcccgaccgc tgcgccttat ccggtaacta tcgtcttgag tccaacccgg taagacacga + 1801 cttatcgcca ctggcagcag ccactggtaa caggattagc agagcgaggt atgtaggcgg + 1861 tgctacagag ttcttgaagt ggtggcctaa ctacggctac actagaagaa cagtatttgg + 1921 tatctgcgct ctgctgaagc cagttacctt cggaaaaaga gttggtagct cttgatccgg + 1981 caaacaaacc accgctggta gcggtggttt ttttgtttgc aagcagcaga ttacgcgcag + 2041 aaaaaaagga tctcaagaag atcctttgat cttttctacg gggtctgacg ctcagtggaa + 2101 cgaaaactca cgttaaggga ttttggtcat gagattatca aaaaggatct tcacctagat + 2161 ccttttaaat taaaaatgaa gttttaaatc aatctaaagt atatatgagt aaacttggtc + 2221 tgacagctcg aggcttggat tctcaccaat aaaaaacgcc cggcggcaac cgagcgttct + 2281 gaacaaatcc agatggagtt ctgaggtcat tactggatct atcaacagga gtccaagcga + 2341 gctcgatatc aaattacgcc ccgccctgcc actcatcgca gtactgttgt aattcattaa + 2401 gcattctgcc gacatggaag ccatcacaaa cggcatgatg aacctgaatc gccagcggca + 2461 tcagcacctt gtcgccttgc gtataatatt tgcccatggt gaaaacgggg gcgaagaagt + 2521 tgtccatatt ggccacgttt aaatcaaaac tggtgaaact cacccaggga ttggctgaga + 2581 cgaaaaacat attctcaata aaccctttag ggaaataggc caggttttca ccgtaacacg + 2641 ccacatcttg cgaatatatg tgtagaaact gccggaaatc gtcgtggtat tcactccaga + 2701 gcgatgaaaa cgtttcagtt tgctcatgga aaacggtgta acaagggtga acactatccc + 2761 atatcaccag ctcaccgtct +// \ No newline at end of file diff --git a/test/test_files/ce_gfp.gb b/test/test_files/ce_gfp.gb new file mode 100644 index 00000000..e4bf9a15 --- /dev/null +++ b/test/test_files/ce_gfp.gb @@ -0,0 +1,93 @@ +LOCUS CE_GFP 2777 bp ds-DNA circular 26-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1195 + /label="GFP" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature 1196..1199 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1213..1284 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1348..1367) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1452..2066) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2233..2338) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggt + 481 gagcaagggc gaggagctgt tcaccggggt ggtgcccatc ctggtcgagc tggacggcga + 541 cgtaaacggc cacaagttca gcgtgtccgg cgagggcgag ggcgatgcca cctacggcaa + 601 gctgaccctg aagttcatct gcaccaccgg caagctgccc gtgccctggc ccaccctcgt + 661 gaccaccttc agctacggcg tgcagtgctt cagccgctac cccgaccaca tgaagcagca + 721 cgacttcttc aagtccgcca tgcccgaagg ctacgtccag gagcgcacca tcttcttcaa + 781 ggacgacggc aactacaaga cccgcgccga ggtgaagttc gagggcgaca ccctggtgaa + 841 ccgcatcgag ctgaagggca tcgacttcaa ggaggacggc aacatcctgg ggcacaagct + 901 ggagtacaac tacaacagcc acaacgtcta tatcatggcc gacaagcaga agaacggcat + 961 caaggtgaac ttcaagatcc gccacaacat cgaggacggc agcgtgcagc tcgccgacca + 1021 ctaccagcag aacaccccca tcggcgacgg ccccgtgctg ctgcccgaca accactacct + 1081 gagcacccag tccgccctga gcaaagaccc caacgagaag cgcgatcaca tggtcctgct + 1141 ggagttcgtg accgccgccg ggatcactca cggcatggac gagctgtaca agtaagcttC + 1201 GAGaccctgc agtccggcaa aaaagggcaa ggtgtcacca ccctgccctt tttctttaaa + 1261 accgaaaaga ttacttcgcg ttatgcaggc ttcctcgctc actgactcgc tgcgctcggt + 1321 cgttcggctg cggcgagcgg tatcagctca ctcaaaggcg gtaatacggt tatccacaga + 1381 atcaggggat aacgcaggaa agaacatgtg agcaaaaggc cagcaaaagg ccaggaaccg + 1441 taaaaaggcc gcgttgctgg cgtttttcca caggctccgc ccccctgacg agcatcacaa + 1501 aaatcgacgc tcaagtcaga ggtggcgaaa cccgacagga ctataaagat accaggcgtt + 1561 tccccctgga agctccctcg tgcgctctcc tgttccgacc ctgccgctta ccggatacct + 1621 gtccgccttt ctcccttcgg gaagcgtggc gctttctcat agctcacgct gtaggtatct + 1681 cagttcggtg taggtcgttc gctccaagct gggctgtgtg cacgaacccc ccgttcagcc + 1741 cgaccgctgc gccttatccg gtaactatcg tcttgagtcc aacccggtaa gacacgactt + 1801 atcgccactg gcagcagcca ctggtaacag gattagcaga gcgaggtatg taggcggtgc + 1861 tacagagttc ttgaagtggt ggcctaacta cggctacact agaagaacag tatttggtat + 1921 ctgcgctctg ctgaagccag ttaccttcgg aaaaagagtt ggtagctctt gatccggcaa + 1981 acaaaccacc gctggtagcg gtggtttttt tgtttgcaag cagcagatta cgcgcagaaa + 2041 aaaaggatct caagaagatc ctttgatctt ttctacgggg tctgacgctc agtggaacga + 2101 aaactcacgt taagggattt tggtcatgag attatcaaaa aggatcttca cctagatcct + 2161 tttaaattaa aaatgaagtt ttaaatcaat ctaaagtata tatgagtaaa cttggtctga + 2221 cagctcgagg cttggattct caccaataaa aaacgcccgg cggcaaccga gcgttctgaa + 2281 caaatccaga tggagttctg aggtcattac tggatctatc aacaggagtc caagcgagct + 2341 cgatatcaaa ttacgccccg ccctgccact catcgcagta ctgttgtaat tcattaagca + 2401 ttctgccgac atggaagcca tcacaaacgg catgatgaac ctgaatcgcc agcggcatca + 2461 gcaccttgtc gccttgcgta taatatttgc ccatggtgaa aacgggggcg aagaagttgt + 2521 ccatattggc cacgtttaaa tcaaaactgg tgaaactcac ccagggattg gctgagacga + 2581 aaaacatatt ctcaataaac cctttaggga aataggccag gttttcaccg taacacgcca + 2641 catcttgcga atatatgtgt agaaactgcc ggaaatcgtc gtggtattca ctccagagcg + 2701 atgaaaacgt ttcagtttgc tcatggaaaa cggtgtaaca agggtgaaca ctatcccata + 2761 tcaccagctc accgtct +// \ No newline at end of file diff --git a/test/test_files/ce_mrfp1.gb b/test/test_files/ce_mrfp1.gb new file mode 100644 index 00000000..5ce34456 --- /dev/null +++ b/test/test_files/ce_mrfp1.gb @@ -0,0 +1,92 @@ +LOCUS CE_mRFP1 2738 bp ds-DNA circular 26-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC +FEATURES Location/Qualifiers + misc_feature 346..365 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(422..466) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 468..473 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 475..478 + /label="C" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature 479..1156 + /label="mRFP1" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 1157..1160 + /label="E" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 1174..1245 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(1309..1328) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(1413..2027) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(2194..2299) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" +ORIGIN + 1 tcattgccat acgaaattcc ggatgagcat tcatcaggcg ggcaagaatg tgaataaagg + 61 ccggataaaa cttgtgctta tttttcttta cggtctttaa aaaggccgta atatccagct + 121 gaacggtctg gttataggta cattgagcaa ctgactgaaa tgcctcaaaa tgttctttac + 181 gatgccattg ggatatatca acggtggtat atccagtgat ttttttctcc attttagctt + 241 ccttagctcc tgaaaatctc gataactcaa aaaatacgcc cggtagtgat cttatttcat + 301 tatggtgaaa gttggaacct cttacgtgcc cgatcaactc gagtgccacc tgacgtctaa + 361 gaaaccatta ttatcatgac attaacctat aaaaataggc gtatcacgag gcagaatttc + 421 agataaaaaa aatccttagc tttcgctaag gatgatttct ggaattcggt ctcgaatggc + 481 ttcctccgaa gatgttatca aagagttcat gcgtttcaaa gttcgtatgg aaggttccgt + 541 taacggtcac gagttcgaaa tcgaaggtga aggtgaaggt cgtccgtacg aaggtaccca + 601 gaccgctaaa ctgaaagtta ccaaaggtgg tccgctgccg ttcgcttggg acatcctgtc + 661 cccgcagttc cagtacggtt ccaaagctta cgttaaacac ccggctgaca tcccggacta + 721 cctgaaactg tccttcccgg aaggtttcaa atgggaacgt gttatgaact tcgaggacgg + 781 tggtgttgtt accgttaccc aggactcctc cctgcaagac ggtgagttca tctacaaagt + 841 taaactgcgt ggtaccaact tcccgtccga cggtccggtt atgcagaaaa aaaccatggg + 901 ttgggaagct tccaccgaac gtatgtaccc ggaagatggt gctctgaaag gtgaaatcaa + 961 aatgcgtctg aaactgaaag acggtggtca ctacgacgct gaagttaaaa ccacctacat + 1021 ggctaaaaaa ccggttcagc tgccgggtgc ttacaaaacc gacatcaaac tggacatcac + 1081 ctcccacaac gaggactaca ccatcgttga acagtacgaa cgtgctgaag gtcgtcactc + 1141 caccggtgct taatgagctt CGAGaccctg cagtccggca aaaaagggca aggtgtcacc + 1201 accctgccct ttttctttaa aaccgaaaag attacttcgc gttatgcagg cttcctcgct + 1261 cactgactcg ctgcgctcgg tcgttcggct gcggcgagcg gtatcagctc actcaaaggc + 1321 ggtaatacgg ttatccacag aatcagggga taacgcagga aagaacatgt gagcaaaagg + 1381 ccagcaaaag gccaggaacc gtaaaaaggc cgcgttgctg gcgtttttcc acaggctccg + 1441 cccccctgac gagcatcaca aaaatcgacg ctcaagtcag aggtggcgaa acccgacagg + 1501 actataaaga taccaggcgt ttccccctgg aagctccctc gtgcgctctc ctgttccgac + 1561 cctgccgctt accggatacc tgtccgcctt tctcccttcg ggaagcgtgg cgctttctca + 1621 tagctcacgc tgtaggtatc tcagttcggt gtaggtcgtt cgctccaagc tgggctgtgt + 1681 gcacgaaccc cccgttcagc ccgaccgctg cgccttatcc ggtaactatc gtcttgagtc + 1741 caacccggta agacacgact tatcgccact ggcagcagcc actggtaaca ggattagcag + 1801 agcgaggtat gtaggcggtg ctacagagtt cttgaagtgg tggcctaact acggctacac + 1861 tagaagaaca gtatttggta tctgcgctct gctgaagcca gttaccttcg gaaaaagagt + 1921 tggtagctct tgatccggca aacaaaccac cgctggtagc ggtggttttt ttgtttgcaa + 1981 gcagcagatt acgcgcagaa aaaaaggatc tcaagaagat cctttgatct tttctacggg + 2041 gtctgacgct cagtggaacg aaaactcacg ttaagggatt ttggtcatga gattatcaaa + 2101 aaggatcttc acctagatcc ttttaaatta aaaatgaagt tttaaatcaa tctaaagtat + 2161 atatgagtaa acttggtctg acagctcgag gcttggattc tcaccaataa aaaacgcccg + 2221 gcggcaaccg agcgttctga acaaatccag atggagttct gaggtcatta ctggatctat + 2281 caacaggagt ccaagcgagc tcgatatcaa attacgcccc gccctgccac tcatcgcagt + 2341 actgttgtaa ttcattaagc attctgccga catggaagcc atcacaaacg gcatgatgaa + 2401 cctgaatcgc cagcggcatc agcaccttgt cgccttgcgt ataatatttg cccatggtga + 2461 aaacgggggc gaagaagttg tccatattgg ccacgtttaa atcaaaactg gtgaaactca + 2521 cccagggatt ggctgagacg aaaaacatat tctcaataaa ccctttaggg aaataggcca + 2581 ggttttcacc gtaacacgcc acatcttgcg aatatatgtg tagaaactgc cggaaatcgt + 2641 cgtggtattc actccagagc gatgaaaacg tttcagtttg ctcatggaaa acggtgtaac + 2701 aagggtgaac actatcccat atcaccagct caccgtct +// \ No newline at end of file diff --git a/test/test_files/ef_b0015.gb b/test/test_files/ef_b0015.gb new file mode 100644 index 00000000..0c38ecfa --- /dev/null +++ b/test/test_files/ef_b0015.gb @@ -0,0 +1,95 @@ +LOCUS EF_B0015 2190 bp ds-DNA circular 25-OCT-2023 +DEFINITION . +COMMENT From pSB1C00 - Loop universal L0 acceptor and B0015_EF +FEATURES Location/Qualifiers + misc_feature 385..404 + /label="VF Primer binding site" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature complement(461..505) + /label="Terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + BioBrick 507..512 + /label="BsaI Site" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 514..517 + /label="E" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + terminator 518..646 + /label="BBa-B0015 Terminator" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + terminator 526..597 + /label="rrnB T1 terminator" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + terminator 613..640 + /label="T7Te terminator" + /ApEinfo_revcolor="#75c6a9" + /ApEinfo_fwdcolor="#75c6a9" + misc_feature 647..650 + /label="F" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + misc_feature 664..735 + /label="His Terminator" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + misc_feature complement(799..818) + /label="VR Primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + misc_feature complement(903..1517) + /label="rep (pMB1)" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + misc_feature complement(1684..1789) + /label="Terminator T0" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + CDS complement(1802..271) + /label="Cam Resistance" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" +ORIGIN + 1 aagggtgaac actatcccat atcaccagct caccgtcttt cattgccata cgaaattccg + 61 gatgagcatt catcaggcgg gcaagaatgt gaataaaggc cggataaaac ttgtgcttat + 121 ttttctttac ggtctttaaa aaggccgtaa tatccagctg aacggtctgg ttataggtac + 181 attgagcaac tgactgaaat gcctcaaaat gttctttacg atgccattgg gatatatcaa + 241 cggtggtata tccagtgatt tttttctcca ttttagcttc cttagctcct gaaaatctcg + 301 ataactcaaa aaatacgccc ggtagtgatc ttatttcatt atggtgaaag ttggaacctc + 361 ttacgtgccc gatcaactcg agtgccacct gacgtctaag aaaccattat tatcatgaca + 421 ttaacctata aaaataggcg tatcacgagg cagaatttca gataaaaaaa atccttagct + 481 ttcgctaagg atgatttctg gaattcggtc tcgGCTTcca ggcatcaaat aaaacgaaag + 541 gctcagtcga aagactgggc ctttcgtttt atctgttgtt tgtcggtgaa cgctctctac + 601 tagagtcaca ctggctcacc ttcgggtggg cctttctgcg tttatacgct CGAGaccctg + 661 cagtccggca aaaaagggca aggtgtcacc accctgccct ttttctttaa aaccgaaaag + 721 attacttcgc gttatgcagg cttcctcgct cactgactcg ctgcgctcgg tcgttcggct + 781 gcggcgagcg gtatcagctc actcaaaggc ggtaatacgg ttatccacag aatcagggga + 841 taacgcagga aagaacatgt gagcaaaagg ccagcaaaag gccaggaacc gtaaaaaggc + 901 cgcgttgctg gcgtttttcc acaggctccg cccccctgac gagcatcaca aaaatcgacg + 961 ctcaagtcag aggtggcgaa acccgacagg actataaaga taccaggcgt ttccccctgg + 1021 aagctccctc gtgcgctctc ctgttccgac cctgccgctt accggatacc tgtccgcctt + 1081 tctcccttcg ggaagcgtgg cgctttctca tagctcacgc tgtaggtatc tcagttcggt + 1141 gtaggtcgtt cgctccaagc tgggctgtgt gcacgaaccc cccgttcagc ccgaccgctg + 1201 cgccttatcc ggtaactatc gtcttgagtc caacccggta agacacgact tatcgccact + 1261 ggcagcagcc actggtaaca ggattagcag agcgaggtat gtaggcggtg ctacagagtt + 1321 cttgaagtgg tggcctaact acggctacac tagaagaaca gtatttggta tctgcgctct + 1381 gctgaagcca gttaccttcg gaaaaagagt tggtagctct tgatccggca aacaaaccac + 1441 cgctggtagc ggtggttttt ttgtttgcaa gcagcagatt acgcgcagaa aaaaaggatc + 1501 tcaagaagat cctttgatct tttctacggg gtctgacgct cagtggaacg aaaactcacg + 1561 ttaagggatt ttggtcatga gattatcaaa aaggatcttc acctagatcc ttttaaatta + 1621 aaaatgaagt tttaaatcaa tctaaagtat atatgagtaa acttggtctg acagctcgag + 1681 gcttggattc tcaccaataa aaaacgcccg gcggcaaccg agcgttctga acaaatccag + 1741 atggagttct gaggtcatta ctggatctat caacaggagt ccaagcgagc tcgatatcaa + 1801 attacgcccc gccctgccac tcatcgcagt actgttgtaa ttcattaagc attctgccga + 1861 catggaagcc atcacaaacg gcatgatgaa cctgaatcgc cagcggcatc agcaccttgt + 1921 cgccttgcgt ataatatttg cccatggtga aaacgggggc gaagaagttg tccatattgg + 1981 ccacgtttaa atcaaaactg gtgaaactca cccagggatt ggctgagacg aaaaacatat + 2041 tctcaataaa ccctttaggg aaataggcca ggttttcacc gtaacacgcc acatcttgcg + 2101 aatatatgtg tagaaactgc cggaaatcgt cgtggtattc actccagagc gatgaaaacg + 2161 tttcagtttg ctcatggaaa acggtgtaac +// \ No newline at end of file diff --git a/test/test_files/j23100_b0034.gb b/test/test_files/j23100_b0034.gb deleted file mode 100644 index e3c10b03..00000000 --- a/test/test_files/j23100_b0034.gb +++ /dev/null @@ -1,82 +0,0 @@ -LOCUS Copy_of_J23100_B0034_AC 2123 bp ds-DNA circular 27-JAN-2023 -DEFINITION . -COMMENT From pSB1C00 - Loop universal L0 acceptor and J23100_B0034_AC -FEATURES Location/Qualifiers - misc_feature 347..366 - /label="VF Primer binding site" - /ApEinfo_revcolor="#b7e6d7" - /ApEinfo_fwdcolor="#b7e6d7" - misc_feature complement(423..467) - /label="Terminator" - /ApEinfo_revcolor="#c7b0e3" - /ApEinfo_fwdcolor="#c7b0e3" - BioBrick 469..474 - /label="BsaI Site" - /ApEinfo_revcolor="#f58a5e" - /ApEinfo_fwdcolor="#f58a5e" - Promoter 483..517 - /label="BBa_J23100" - /ApEinfo_revcolor="#b4abac" - /ApEinfo_fwdcolor="#b4abac" - RBS 524..535 - /label="B0034" - /ApEinfo_revcolor="#b4abac" - /ApEinfo_fwdcolor="#b4abac" - misc_feature 559..630 - /label="His Terminator" - /ApEinfo_revcolor="#ffef86" - /ApEinfo_fwdcolor="#ffef86" - misc_feature complement(694..713) - /label="VR Primer binding site" - /ApEinfo_revcolor="#b1ff67" - /ApEinfo_fwdcolor="#b1ff67" - misc_feature complement(798..1412) - /label="rep (pMB1)" - /ApEinfo_revcolor="#85dae9" - /ApEinfo_fwdcolor="#85dae9" - misc_feature complement(1579..1684) - /label="Terminator T0" - /ApEinfo_revcolor="#c6c9d1" - /ApEinfo_fwdcolor="#c6c9d1" - CDS complement(1697..233) - /label="Cam Resistance" - /ApEinfo_revcolor="#ff9ccd" - /ApEinfo_fwdcolor="#ff9ccd" -ORIGIN - 1 ttcattgcca tacgaaattc cggatgagca ttcatcaggc gggcaagaat gtgaataaag - 61 gccggataaa acttgtgctt atttttcttt acggtcttta aaaaggccgt aatatccagc - 121 tgaacggtct ggttataggt acattgagca actgactgaa atgcctcaaa atgttcttta - 181 cgatgccatt gggatatatc aacggtggta tatccagtga tttttttctc cattttagct - 241 tccttagctc ctgaaaatct cgataactca aaaaatacgc ccggtagtga tcttatttca - 301 ttatggtgaa agttggaacc tcttacgtgc ccgatcaact cgagtgccac ctgacgtcta - 361 agaaaccatt attatcatga cattaaccta taaaaatagg cgtatcacga ggcagaattt - 421 cagataaaaa aaatccttag ctttcgctaa ggatgatttc tggaattcgg tctcgggagt - 481 ctTTGACGGC TAGCTCAGTC CTAGGTACAG TGCTAGCCTA GAGAAAGAGG AGAAATACTA - 541 GaatgCGAGa ccctgcagtc cggcaaaaaa gggcaaggtg tcaccaccct gccctttttc - 601 tttaaaaccg aaaagattac ttcgcgttat gcaggcttcc tcgctcactg actcgctgcg - 661 ctcggtcgtt cggctgcggc gagcggtatc agctcactca aaggcggtaa tacggttatc - 721 cacagaatca ggggataacg caggaaagaa catgtgagca aaaggccagc aaaaggccag - 781 gaaccgtaaa aaggccgcgt tgctggcgtt tttccacagg ctccgccccc ctgacgagca - 841 tcacaaaaat cgacgctcaa gtcagaggtg gcgaaacccg acaggactat aaagatacca - 901 ggcgtttccc cctggaagct ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg - 961 atacctgtcc gcctttctcc cttcgggaag cgtggcgctt tctcatagct cacgctgtag - 1021 gtatctcagt tcggtgtagg tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt - 1081 tcagcccgac cgctgcgcct tatccggtaa ctatcgtctt gagtccaacc cggtaagaca - 1141 cgacttatcg ccactggcag cagccactgg taacaggatt agcagagcga ggtatgtagg - 1201 cggtgctaca gagttcttga agtggtggcc taactacggc tacactagaa gaacagtatt - 1261 tggtatctgc gctctgctga agccagttac cttcggaaaa agagttggta gctcttgatc - 1321 cggcaaacaa accaccgctg gtagcggtgg tttttttgtt tgcaagcagc agattacgcg - 1381 cagaaaaaaa ggatctcaag aagatccttt gatcttttct acggggtctg acgctcagtg - 1441 gaacgaaaac tcacgttaag ggattttggt catgagatta tcaaaaagga tcttcaccta - 1501 gatcctttta aattaaaaat gaagttttaa atcaatctaa agtatatatg agtaaacttg - 1561 gtctgacagc tcgaggcttg gattctcacc aataaaaaac gcccggcggc aaccgagcgt - 1621 tctgaacaaa tccagatgga gttctgaggt cattactgga tctatcaaca ggagtccaag - 1681 cgagctcgat atcaaattac gccccgccct gccactcatc gcagtactgt tgtaattcat - 1741 taagcattct gccgacatgg aagccatcac aaacggcatg atgaacctga atcgccagcg - 1801 gcatcagcac cttgtcgcct tgcgtataat atttgcccat ggtgaaaacg ggggcgaaga - 1861 agttgtccat attggccacg tttaaatcaa aactggtgaa actcacccag ggattggctg - 1921 agacgaaaaa catattctca ataaaccctt tagggaaata ggccaggttt tcaccgtaac - 1981 acgccacatc ttgcgaatat atgtgtagaa actgccggaa atcgtcgtgg tattcactcc - 2041 agagcgatga aaacgtttca gtttgctcat ggaaaacggt gtaacaaggg tgaacactat - 2101 cccatatcac cagctcaccg tct -// \ No newline at end of file diff --git a/test/test_files/podd1.gb b/test/test_files/podd1.gb new file mode 100644 index 00000000..66377409 --- /dev/null +++ b/test/test_files/podd1.gb @@ -0,0 +1,221 @@ +LOCUS Copy_of_pSB1K01_-_Loop_ 3286 bp ds-DNA circular 09-NOV-2022 +DEFINITION . +KEYWORDS "accession:pSB1K01" +COMMENT Imported from database: Registry of Standard Biological Parts + Entry: pSB1K01 Description: pOdd1 Loop Vector based on pSB1K3 +FEATURES Location/Qualifiers + primer 386..399 + /label="pOdd1_To_pOdd4_Fw" + /note="sequence: ctggaattcgctcttcacagggagtgagacccaatacgcaaaccgcctct" + /ApEinfo_revcolor="#9eafd2" + /ApEinfo_fwdcolor="#9eafd2" + primer_binding 547..566 + /label="VF primer binding site" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + primer 547..566 + /label="VF2" + /note="sequence: ccacctgacgtctaagaaac" + /ApEinfo_revcolor="#b1ff67" + /ApEinfo_fwdcolor="#b1ff67" + primer complement(651..669) + /label="pOdd_Suff_Rev" + /note="sequence: CGAATTCCAGAAATCATCC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer 656..681 + /label="pOdd1_Insert_Fw" + /note="sequence: atttctggaattcgctcttcaatggg" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + primer 656..684 + /label="pOdd2_BF_Insert_Fw" + /note="sequence: atttctggaattcgctcttcagcaTACTt" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + primer 656..684 + /label="pOdd1_BF_Insert_Fw" + /note="sequence: atttctggaattcgctcttcaatgTACTt" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer 660..709 + /label="pOdd1_To_pOdd4_Fw" + /note="sequence: ctggaattcgctcttcacagggagtgagacccaatacgcaaaccgcctct" + /ApEinfo_revcolor="#9eafd2" + /ApEinfo_fwdcolor="#9eafd2" + dna 663..668 + /label="EcoRI" + /ApEinfo_revcolor="#d6b295" + /ApEinfo_fwdcolor="#d6b295" + dna 669..675 + /label="SapI" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + misc 677..679 + /label="5' Fusion Site ATG" + /ApEinfo_revcolor="#c7b0e3" + /ApEinfo_fwdcolor="#c7b0e3" + misc 680..683 + /label="Fusion Site GGAG" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer 683..696 + /label="pOdd1_CF_Fw" + /note="sequence: CTCTTCAATGaatgTGAGACCCAATAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer 684..702 + /label="pOdd1_BF_OR_BE_Fw" + /note="sequence: CTCTTCAATGtactTGAGACCCAATACGCAAAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + dna complement(685..690) + /label="BsaI" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" + misc_feature 691..1759 + /label="BBa_J04454" + /ApEinfo_revcolor="#b7e6d7" + /ApEinfo_fwdcolor="#b7e6d7" + misc_feature 917..1594 + /label="RFP" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + primer complement(1739..1756) + /label="pOdd_Pre_Rev" + /note="sequence: AAACGCAGAAAGGCCCAC" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + primer complement(1741..1790) + /label="pOdd1_To_pOdd4_Rev" + /note="sequence: ggactgcaggctcttcaaccagcgtgagacctataaacgcagaaaggccc" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + dna 1760..1765 + /label="BsaI" + /ApEinfo_revcolor="#84b0dc" + /ApEinfo_fwdcolor="#84b0dc" + primer complement(1761..1790) + /label="pOdd1_AE_Insert_Rev" + /note="sequence: ggactgcaggctcttcatgcAAGCtgagac" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + misc 1767..1770 + /label="Fusion Site CGCT" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + primer complement(1769..1790) + /label="pOdd1_Insert_Rev" + /note="sequence: ggactgcaggctcttcatgcag" + /ApEinfo_revcolor="#85dae9" + /ApEinfo_fwdcolor="#85dae9" + primer complement(1769..1790) + /label="pOdd2_Insert_Rev" + /note="sequence: ggactgcaggctcttcagtaag" + /ApEinfo_revcolor="#f58a5e" + /ApEinfo_fwdcolor="#f58a5e" + primer 1770..1787 + /label="pOdd1_AE_Fw" + /note="sequence: ATAGGTCTCAgcttGCATGAAGAGCCTGCAG" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + misc 1771..1773 + /label="3' Fusion Site GCA" + /ApEinfo_revcolor="#d59687" + /ApEinfo_fwdcolor="#d59687" + primer 1771..1787 + /label="pOdd1_AC_Fw" + /note="sequence: ATAGGTCTCAaatgGCATGAAGAGCCTGCAG" + /ApEinfo_revcolor="#f7977a" + /ApEinfo_fwdcolor="#f7977a" + dna complement(1775..1781) + /label="SapI" + /ApEinfo_revcolor="#d6b295" + /ApEinfo_fwdcolor="#d6b295" + dna complement(1782..1787) + /label="PstI" + /ApEinfo_revcolor="#ffef86" + /ApEinfo_fwdcolor="#ffef86" + stem_loop 1788..1859 + /label="E. coli his operon terminator" + /ApEinfo_revcolor="#b4abac" + /ApEinfo_fwdcolor="#b4abac" + primer_binding complement(1923..1942) + /label="VR primer binding site" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + primer complement(1923..1942) + /label="VR" + /note="sequence: gtattaccgcctttgagtga" + /ApEinfo_revcolor="#c6c9d1" + /ApEinfo_fwdcolor="#c6c9d1" + misc 2027..2641 + /label="rep (pMB1)" + /ApEinfo_revcolor="#f8d3a9" + /ApEinfo_fwdcolor="#f8d3a9" + primer 2356..2370 + /label="pOdd1_To_pOdd4_Rev" + /note="sequence: ggactgcaggctcttcaaccagcgtgagacctataaacgcagaaaggccc" + /ApEinfo_revcolor="#ff9ccd" + /ApEinfo_fwdcolor="#ff9ccd" + misc 2863..392 + /label="Kanamycin resistance marker" + /ApEinfo_revcolor="#faac61" + /ApEinfo_fwdcolor="#faac61" +ORIGIN + 1 ccggcgcagg aacactgcca gcgcatcaac aatattttca cctgaatcag gatattcttc + 61 taatacctgg aatgctgttt tcccggggat cgcagtggtg agtaaccatg catcatcagg + 121 agtacggata aaatgcttga tggtcggaag aggcataaat tccgtcagcc agtttagtct + 181 gaccatctca tctgtaacat cattggcaac gctacctttg ccatgtttca gaaacaactc + 241 tggcgcatcg ggcttcccat acaatcgata gattgtcgca cctgattgcc cgacattatc + 301 gcgagcccat ttatacccat ataaatcagc atccatgttg gaatttaatc gcggcctgga + 361 gcaagacgtt tcccgttgaa tatggctcat aacacccctt gtattactgt ttatgtaagc + 421 agacagtttt attgttcatg atgatatatt tttatcttgt gcaatgtaac atcagagatt + 481 ttgagacaca acgtggcttt gttgaataaa tcgaactttt gctgagttga aggatcagct + 541 cgagtgccac ctgacgtcta agaaaccatt attatcatga cattaaccta taaaaatagg + 601 cgtatcacga ggcagaattt cagataaaaa aaatccttag ctttcgctaa ggatgatttc + 661 tggaattcgc tcttcaatgg gagtgagacc caatacgcaa accgcctctc cccgcgcgtt + 721 ggccgattca ttaatgcagc tggcacgaca ggtttcccga ctggaaagcg ggcagtgagc + 781 gcaacgcaat taatgtgagt tagctcactc attaggcacc ccaggcttta cactttatgc + 841 ttccggctcg tatgttgtgt ggaattgtga gcggataaca atttcacaca tactagagaa + 901 agaggagaaa tactagatgg cttcctccga agacgttatc aaagagttca tgcgtttcaa + 961 agttcgtatg gaaggttccg ttaacggtca cgagttcgaa atcgaaggtg aaggtgaagg + 1021 tcgtccgtac gaaggtaccc agaccgctaa actgaaagtt accaaaggtg gtccgctgcc + 1081 gttcgcttgg gacatcctgt ccccgcagtt ccagtacggt tccaaagctt acgttaaaca + 1141 cccggctgac atcccggact acctgaaact gtccttcccg gaaggtttca aatgggaacg + 1201 tgttatgaac ttcgaagacg gtggtgttgt taccgttacc caggactcct ccctgcaaga + 1261 cggtgagttc atctacaaag ttaaactgcg tggtaccaac ttcccgtccg acggtccggt + 1321 tatgcagaaa aaaaccatgg gttgggaagc ttccaccgaa cgtatgtacc cggaagacgg + 1381 tgctctgaaa ggtgaaatca aaatgcgtct gaaactgaaa gacggtggtc actacgacgc + 1441 tgaagttaaa accacctaca tggctaaaaa accggttcag ctgccgggtg cttacaaaac + 1501 cgacatcaaa ctggacatca cctcccacaa cgaagactac accatcgttg aacagtacga + 1561 acgtgctgaa ggtcgtcact ccaccggtgc ttaataacgc tgatagtgct agtgtagatc + 1621 gctactagag ccaggcatca aataaaacga aaggctcagt cgaaagactg ggcctttcgt + 1681 tttatctgtt gtttgtcggt gaacgctctc tactagagtc acactggctc accttcgggt + 1741 gggcctttct gcgtttatag gtctcacgct gcatgaagag cctgcagtcc ggcaaaaaag + 1801 ggcaaggtgt caccaccctg ccctttttct ttaaaaccga aaagattact tcgcgttatg + 1861 caggcttcct cgctcactga ctcgctgcgc tcggtcgttc ggctgcggcg agcggtatca + 1921 gctcactcaa aggcggtaat acggttatcc acagaatcag gggataacgc aggaaagaac + 1981 atgtgagcaa aaggccagca aaaggccagg aaccgtaaaa aggccgcgtt gctggcgttt + 2041 ttccacaggc tccgcccccc tgacgagcat cacaaaaatc gacgctcaag tcagaggtgg + 2101 cgaaacccga caggactata aagataccag gcgtttcccc ctggaagctc cctcgtgcgc + 2161 tctcctgttc cgaccctgcc gcttaccgga tacctgtccg cctttctccc ttcgggaagc + 2221 gtggcgcttt ctcatagctc acgctgtagg tatctcagtt cggtgtaggt cgttcgctcc + 2281 aagctgggct gtgtgcacga accccccgtt cagcccgacc gctgcgcctt atccggtaac + 2341 tatcgtcttg agtccaaccc ggtaagacac gacttatcgc cactggcagc agccactggt + 2401 aacaggatta gcagagcgag gtatgtaggc ggtgctacag agttcttgaa gtggtggcct + 2461 aactacggct acactagaag aacagtattt ggtatctgcg ctctgctgaa gccagttacc + 2521 ttcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg tagcggtggt + 2581 ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga agatcctttg + 2641 atcttttcta cggggtctga cgctcagtgg aacgaaaact cacgttaagg gattttggtc + 2701 atgagattat caaaaaggat cttcacctag atccttttaa attaaaaatg aagttttaaa + 2761 tcaatctaaa gtatatatga gtaaacttgg tctgacagct cgagtcccgt caagtcagcg + 2821 taatgctctg ccagtgttac aaccaattaa ccaattctga ttagaaaaac tcatcgagca + 2881 tcaaatgaaa ctgcaattta ttcatatcag gattatcaat accatatttt tgaaaaagcc + 2941 gtttctgtaa tgaaggagaa aactcaccga ggcagttcca taggatggca agatcctggt + 3001 atcggtctgc gattccgact cgtccaacat caatacaacc tattaatttc ccctcgtcaa + 3061 aaataaggtt atcaagtgag aaatcaccat gagtgacgac tgaatccggt gagaatggca + 3121 aaagcttatg catttctttc cagacttgtt caacaggcca gccattacgc tcgtcatcaa + 3181 aatcactcgc atcaaccaaa ccgttattca ttcgtgattg cgcctgagcg agacgaaata + 3241 cgcgatcgct gttaaaagga caattacaaa caggaatcga atgcaa +// \ No newline at end of file From 3d6598fbc2643c6a3b21514cf9bed7bd1182ff35 Mon Sep 17 00:00:00 2001 From: Gonza10V Date: Tue, 31 Oct 2023 12:14:17 +0000 Subject: [PATCH 32/42] adding notebook --- sbol_utilities/component.py | 2 +- test/test_files/sbol_gg.ipynb | 422 ++++++++++++++++++++++++++++++++++ 2 files changed, 423 insertions(+), 1 deletion(-) create mode 100644 test/test_files/sbol_gg.ipynb diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index e3cc6b4c..5dfa84be 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -577,7 +577,7 @@ def backbone_from_sbol(identity: Union[str,None], sbol_comp: sbol3.Component, dr """Creates a Backbone Component and its Sequence. :param identity: The identity of the Component. The identity of Sequence is also identity with the suffix '_seq'. - :param sequence: The DNA sequence of the Component encoded in IUPAC. + :param sbol_comp: The SBOL Component containing the DNA sequence to use. :param dropout_location: List of 2 integers that indicates the start and the end of the dropout sequence including overhangs. Note that the index of the first location is 1, as is typical practice in biology, rather than 0, as is typical practice in computer science. :param fusion_site_length: Integer of the lenght of the fusion sites (eg. BsaI fusion site lenght is 4, SapI fusion site lenght is 3) :param linear: Boolean than indicates if the backbone is linear, by default it is seted to False which means that it has a circular topology. diff --git a/test/test_files/sbol_gg.ipynb b/test/test_files/sbol_gg.ipynb new file mode 100644 index 00000000..f8b2bf77 --- /dev/null +++ b/test/test_files/sbol_gg.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sbol3\n", + "from sbol_utilities.component import ed_restriction_enzyme, backbone, part_in_backbone, part_in_backbone_from_sbol, \\\n", + " digestion, ligation, Assembly_plan_composite_in_backbone_single_enzyme, backbone_from_sbol\n", + "from sbol_utilities.conversion import convert_from_genbank\n", + "from itertools import product\n", + "from sbol_utilities.component import contained_components" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple assembly" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Test assembly plan class\"\"\"\n", + "doc = sbol3.Document()\n", + "sbol3.set_namespace('http://sbolstandard.org/testfiles')\n", + "# Assembly plan setup\n", + "bsai = ed_restriction_enzyme('BsaI')\n", + "#lvl1 acceptor\n", + "podd1_dir = os.path.join('podd1.gb')\n", + "podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V')\n", + "podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0]\n", + "podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb')\n", + "doc.add([podd_backbone,podd_backbone_seq])\n", + "#parts in backbone\n", + "##get parts from genbank\n", + "j23100_dir = os.path.join('ab_j23100.gb')\n", + "b0034_dir = os.path.join('bc_b0034.gb')\n", + "gfp_dir = os.path.join('ce_gfp.gb')\n", + "b0015_dir = os.path.join('ef_b0015.gb')\n", + "j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V')\n", + "j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0]\n", + "b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V')\n", + "b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0]\n", + "gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V')\n", + "gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0]\n", + "b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V')\n", + "b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0]\n", + "##SBOL parts in backbone\n", + "j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb')\n", + "doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq])\n", + "b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb')\n", + "doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq])\n", + "gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb')\n", + "doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq])\n", + "b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb')\n", + "doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq])\n", + "\n", + "\n", + "#Assembly plan\n", + "simple_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( \n", + " name='simple_green_transcriptional_unit',\n", + " parts_in_backbone=[j23100_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, b0015_ef_in_bb], \n", + " acceptor_backbone=podd_backbone,\n", + " restriction_enzyme=bsai,\n", + " document=doc)\n", + "simple_assembly_plan.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Should have 1 product" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + }, + { + "data": { + "text/plain": [ + "[[,\n", + " ]]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(simple_assembly_plan.products))\n", + "simple_assembly_plan.products" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print doc components" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://sbolstandard.org/testfiles/pOdd_bb\n", + "http://sbolstandard.org/testfiles/pOdd_bb_seq\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/simple_green_transcriptional_unit_assembly_plan\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone_seq\n", + "http://sbolstandard.org/testfiles/composite_1_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_1_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb_seq\n" + ] + } + ], + "source": [ + "for obj in simple_assembly_plan.document.objects:\n", + " print(obj.identity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get component sequence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for obj in simple_assembly_plan.document.objects:\n", + " if obj.identity =='http://sbolstandard.org/testfiles/composite_0_part_5_part_1_j23100_ab_in_bb_part_2_b0034_bc_in_bb_part_3_gfp_ce_in_bb_part_4_b0015_ef_in_bb':\n", + " print(obj.sequences[0].lookup().elements)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combinatorial assembly" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"Test assembly plan class\"\"\"\n", + "doc = sbol3.Document()\n", + "sbol3.set_namespace('http://sbolstandard.org/testfiles')\n", + "# Assembly plan setup\n", + "bsai = ed_restriction_enzyme('BsaI')\n", + "#lvl1 acceptor\n", + "podd1_dir = os.path.join('podd1.gb')\n", + "podd_doc = convert_from_genbank(podd1_dir, 'https://github.com/Gonza10V')\n", + "podd_af = [top_level for top_level in podd_doc if type(top_level)==sbol3.Component][0]\n", + "podd_backbone, podd_backbone_seq = backbone_from_sbol('pOdd_bb', podd_af, [680,1770], 4, False, name='pOdd_bb')\n", + "doc.add([podd_backbone,podd_backbone_seq])\n", + "#parts in backbone\n", + "##get parts from genbank\n", + "j23100_dir = os.path.join('ab_j23100.gb')\n", + "j23101_dir = os.path.join('ab_j23101.gb')\n", + "b0034_dir = os.path.join('bc_b0034.gb')\n", + "gfp_dir = os.path.join('ce_gfp.gb')\n", + "rfp_dir = os.path.join('ce_mrfp1.gb')\n", + "cfp_dir = os.path.join('ce_ecfp.gb')\n", + "b0015_dir = os.path.join('ef_b0015.gb')\n", + "j23100_doc = convert_from_genbank(j23100_dir, 'https://github.com/Gonza10V')\n", + "j23100_ab = [top_level for top_level in j23100_doc if type(top_level)==sbol3.Component][0]\n", + "j23101_doc = convert_from_genbank(j23101_dir, 'https://github.com/Gonza10V')\n", + "j23101_ab = [top_level for top_level in j23101_doc if type(top_level)==sbol3.Component][0]\n", + "b0034_doc = convert_from_genbank(b0034_dir, 'https://github.com/Gonza10V')\n", + "b0034_bc = [top_level for top_level in b0034_doc if type(top_level)==sbol3.Component][0]\n", + "gfp_doc = convert_from_genbank(gfp_dir, 'https://github.com/Gonza10V')\n", + "gfp_ce = [top_level for top_level in gfp_doc if type(top_level)==sbol3.Component][0]\n", + "rfp_doc = convert_from_genbank(rfp_dir, 'https://github.com/Gonza10V')\n", + "rfp_ce = [top_level for top_level in rfp_doc if type(top_level)==sbol3.Component][0]\n", + "cfp_doc = convert_from_genbank(cfp_dir, 'https://github.com/Gonza10V')\n", + "cfp_ce = [top_level for top_level in cfp_doc if type(top_level)==sbol3.Component][0]\n", + "b0015_doc = convert_from_genbank(b0015_dir, 'https://github.com/Gonza10V')\n", + "b0015_ef = [top_level for top_level in b0015_doc if type(top_level)==sbol3.Component][0]\n", + "##SBOL parts in backbone\n", + "j23100_ab_in_bb, j23100_ab_in_bb_seq = part_in_backbone_from_sbol('j23100_ab_in_bb', j23100_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23100_ab_in_bb')\n", + "doc.add([j23100_ab_in_bb, j23100_ab_in_bb_seq])\n", + "j23101_ab_in_bb, j23101_ab_in_bb_seq = part_in_backbone_from_sbol('j23101_ab_in_bb', j23101_ab, [479,513], [sbol3.SO_PROMOTER], 4, False, name='j23101_ab_in_bb')\n", + "doc.add([j23101_ab_in_bb, j23101_ab_in_bb_seq])\n", + "b0034_bc_in_bb, b0034_bc_in_bb_seq = part_in_backbone_from_sbol('b0034_bc_in_bb', b0034_bc, [479,499], [sbol3.SO_RBS], 4, False, name='b0034_bc_in_bb')\n", + "doc.add([b0034_bc_in_bb, b0034_bc_in_bb_seq])\n", + "gfp_ce_in_bb, gfp_ce_in_bb_seq = part_in_backbone_from_sbol('gfp_ce_in_bb', gfp_ce, [479,1195], [sbol3.SO_CDS], 4, False, name='gfp_ce_in_bb')\n", + "doc.add([gfp_ce_in_bb, gfp_ce_in_bb_seq])\n", + "rfp_ce_in_bb, rfp_ce_in_bb_seq = part_in_backbone_from_sbol('rfp_ce_in_bb', rfp_ce, [479,1156], [sbol3.SO_CDS], 4, False, name='rfp_ce_in_bb')\n", + "doc.add([rfp_ce_in_bb, rfp_ce_in_bb_seq])\n", + "cfp_ce_in_bb, cfp_ce_in_bb_seq = part_in_backbone_from_sbol('cfp_ce_in_bb', cfp_ce, [479,1198], [sbol3.SO_CDS], 4, False, name='cfp_ce_in_bb')\n", + "doc.add([cfp_ce_in_bb, cfp_ce_in_bb_seq])\n", + "b0015_ef_in_bb, b0015_ef_in_bb_seq = part_in_backbone_from_sbol('b0015_ef_in_bb', b0015_ef, [518,646], [sbol3.SO_TERMINATOR], 4, False, name='b0015_ef_in_bb')\n", + "doc.add([b0015_ef_in_bb, b0015_ef_in_bb_seq])\n", + "\n", + "\n", + "#Assembly plan\n", + "combinatorial_assembly_plan = Assembly_plan_composite_in_backbone_single_enzyme( \n", + " name='combinatorial_rgb_transcriptional_units',\n", + " parts_in_backbone=[j23100_ab_in_bb, j23101_ab_in_bb, b0034_bc_in_bb, gfp_ce_in_bb, rfp_ce_in_bb, cfp_ce_in_bb, b0015_ef_in_bb], \n", + " acceptor_backbone=podd_backbone,\n", + " restriction_enzyme=bsai,\n", + " document=doc)\n", + "combinatorial_assembly_plan.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "should produce 6 products" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n" + ] + }, + { + "data": { + "text/plain": [ + "[[,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ],\n", + " [,\n", + " ]]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(combinatorial_assembly_plan.products))\n", + "combinatorial_assembly_plan.products\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get doc components" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://sbolstandard.org/testfiles/pOdd_bb\n", + "http://sbolstandard.org/testfiles/pOdd_bb_seq\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/combinatorial_rgb_transcriptional_units_assembly_plan\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23100_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/j23101_ab_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0034_bc_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/gfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/rfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/cfp_ce_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract\n", + "http://sbolstandard.org/testfiles/b0015_ef_in_bb_part_extract_seq\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone\n", + "http://sbolstandard.org/testfiles/pOdd_bb_backbone_seq\n", + "http://sbolstandard.org/testfiles/composite_1_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_1_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_2_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_2_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_4_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_4_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_4_gfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_5_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_5_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_5_rfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n", + "http://sbolstandard.org/testfiles/composite_6_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb\n", + "http://sbolstandard.org/testfiles/composite_6_part_8_part_2_j23101_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb_seq\n" + ] + } + ], + "source": [ + "for obj in combinatorial_assembly_plan.document.objects:\n", + " print(obj.identity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cgctgcatgaagagcctgcagtccggcaaaaaagggcaaggtgtcaccaccctgccctttttctttaaaaccgaaaagattacttcgcgttatgcaggcttcctcgctcactgactcgctgcgctcggtcgttcggctgcggcgagcggtatcagctcactcaaaggcggtaatacggttatccacagaatcaggggataacgcaggaaagaacatgtgagcaaaaggccagcaaaaggccaggaaccgtaaaaaggccgcgttgctggcgtttttccacaggctccgcccccctgacgagcatcacaaaaatcgacgctcaagtcagaggtggcgaaacccgacaggactataaagataccaggcgtttccccctggaagctccctcgtgcgctctcctgttccgaccctgccgcttaccggatacctgtccgcctttctcccttcgggaagcgtggcgctttctcatagctcacgctgtaggtatctcagttcggtgtaggtcgttcgctccaagctgggctgtgtgcacgaaccccccgttcagcccgaccgctgcgccttatccggtaactatcgtcttgagtccaacccggtaagacacgacttatcgccactggcagcagccactggtaacaggattagcagagcgaggtatgtaggcggtgctacagagttcttgaagtggtggcctaactacggctacactagaagaacagtatttggtatctgcgctctgctgaagccagttaccttcggaaaaagagttggtagctcttgatccggcaaacaaaccaccgctggtagcggtggtttttttgtttgcaagcagcagattacgcgcagaaaaaaaggatctcaagaagatcctttgatcttttctacggggtctgacgctcagtggaacgaaaactcacgttaagggattttggtcatgagattatcaaaaaggatcttcacctagatccttttaaattaaaaatgaagttttaaatcaatctaaagtatatatgagtaaacttggtctgacagctcgagtcccgtcaagtcagcgtaatgctctgccagtgttacaaccaattaaccaattctgattagaaaaactcatcgagcatcaaatgaaactgcaatttattcatatcaggattatcaataccatatttttgaaaaagccgtttctgtaatgaaggagaaaactcaccgaggcagttccataggatggcaagatcctggtatcggtctgcgattccgactcgtccaacatcaatacaacctattaatttcccctcgtcaaaaataaggttatcaagtgagaaatcaccatgagtgacgactgaatccggtgagaatggcaaaagcttatgcatttctttccagacttgttcaacaggccagccattacgctcgtcatcaaaatcactcgcatcaaccaaaccgttattcattcgtgattgcgcctgagcgagacgaaatacgcgatcgctgttaaaaggacaattacaaacaggaatcgaatgcaaccggcgcaggaacactgccagcgcatcaacaatattttcacctgaatcaggatattcttctaatacctggaatgctgttttcccggggatcgcagtggtgagtaaccatgcatcatcaggagtacggataaaatgcttgatggtcggaagaggcataaattccgtcagccagtttagtctgaccatctcatctgtaacatcattggcaacgctacctttgccatgtttcagaaacaactctggcgcatcgggcttcccatacaatcgatagattgtcgcacctgattgcccgacattatcgcgagcccatttatacccatataaatcagcatccatgttggaatttaatcgcggcctggagcaagacgtttcccgttgaatatggctcataacaccccttgtattactgtttatgtaagcagacagttttattgttcatgatgatatatttttatcttgtgcaatgtaacatcagagattttgagacacaacgtggctttgttgaataaatcgaacttttgctgagttgaaggatcagctcgagtgccacctgacgtctaagaaaccattattatcatgacattaacctataaaaataggcgtatcacgaggcagaatttcagataaaaaaaatccttagctttcgctaaggatgatttctggaattcgctcttcaatgGGAGttgacggctagctcagtcctaggtacagtgctagcTACTagagaaagaggagaaatactaaatggtgagcaagggcgaggagctgttcaccggggtggtgcccatcctggtcgagctggacggcgacgtgaacggccacaagttcagcgtgtccggcgagggcgagggcgatgccacctacggcaagctgaccctgaagttcatctgcaccaccggcaagctgcccgtgccctggcccaccctcgtgaccaccctgacctggggcgtgcagtgcttcagccgctaccccgaccacatgaagcagcacgacttcttcaagtccgccatgcccgaaggctacgtccaggagcgcaccatcttcttcaaggacgacggcaactacaagacccgcgccgaggtgaagttcgagggcgacaccctggtgaaccgcatcgagctgaagggcatcgacttcaaggaggacggcaacatcctggggcacaagctggagtacaactacatcagccacaacgtctatatcaccgccgacaagcagaagaacggcatcaaggccaacttcaagatccgccacaacatcgaggacggcagcgtgcagctcgccgaccactaccagcagaacacccccatcggcgacggccccgtgctgctgcccgacaaccactacctgagcacccagtccgccctgagcaaagaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcactctcggcatggacgagctgtacaagtaataaGCTTccaggcatcaaataaaacgaaaggctcagtcgaaagactgggcctttcgttttatctgttgtttgtcggtgaacgctctctactagagtcacactggctcaccttcgggtgggcctttctgcgtttata\n" + ] + } + ], + "source": [ + "for obj in combinatorial_assembly_plan.document.objects:\n", + " if obj.identity =='http://sbolstandard.org/testfiles/composite_3_part_8_part_1_j23100_ab_in_bb_part_3_b0034_bc_in_bb_part_6_cfp_ce_in_bb_part_7_b0015_ef_in_bb':\n", + " print(obj.sequences[0].lookup().elements)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "LOICA", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 335c1d9c79378211c79013c452ca3e8d7a2b6d36 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Sat, 16 Dec 2023 11:45:43 -0700 Subject: [PATCH 33/42] update in engineered region starting at 0 rather that at one for order of features --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 5dfa84be..e0278bf1 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -433,7 +433,7 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L er_component.features.append(to_add) if len(er_component.features) > 1: for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i + 1]) + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i-1], er_component.features[i]) er_component.constraints = [constraint] else: pass From 421a076b50f444ae1f4696f91e196aeae91b3efd Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Sat, 16 Dec 2023 11:59:43 -0700 Subject: [PATCH 34/42] appending constraints to er --- sbol_utilities/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index e0278bf1..ce678591 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -433,8 +433,8 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L er_component.features.append(to_add) if len(er_component.features) > 1: for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i-1], er_component.features[i]) - er_component.constraints = [constraint] + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i+1]) + er_component.constraints.append(constraint) else: pass return er_component From 193ec3ea8ce43e520324d6156d23102ccc870e36 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Sat, 16 Dec 2023 12:36:47 -0700 Subject: [PATCH 35/42] swapping er features order on constraints --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index ce678591..b466f158 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -433,7 +433,7 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L er_component.features.append(to_add) if len(er_component.features) > 1: for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i+1]) + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i+1], er_component.features[i]) er_component.constraints.append(constraint) else: pass From 5ab7e2a4c6690d0019369f8f002fad94ff6b0f5a Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:31:28 -0700 Subject: [PATCH 36/42] now fixing order is optional on er --- sbol_utilities/component.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index b466f158..5864f2d0 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -416,7 +416,7 @@ def operator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, s return operator_component, operator_seq -def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], **kwargs) \ +def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool, **kwargs) \ -> sbol3.Component: """Creates an Engineered Region Component, with features assumed to be in linear order @@ -431,12 +431,13 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L if isinstance(to_add, sbol3.Component): to_add = sbol3.SubComponent(to_add) er_component.features.append(to_add) - if len(er_component.features) > 1: - for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i+1], er_component.features[i]) - er_component.constraints.append(constraint) - else: - pass + if fix_order == True: + if len(er_component.features) > 1: + for i in range(len(er_component.features)-1): + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i+1], er_component.features[i]) + er_component.constraints.append(constraint) + else: + pass return er_component From 440854374a090ccfad728f3301861bc9d3942cd1 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Sat, 16 Dec 2023 15:54:31 -0700 Subject: [PATCH 37/42] fixing order is optional and uses meets intead of preceeds --- sbol_utilities/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 5864f2d0..e5998b88 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -416,7 +416,7 @@ def operator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, s return operator_component, operator_seq -def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool, **kwargs) \ +def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool=False, **kwargs) \ -> sbol3.Component: """Creates an Engineered Region Component, with features assumed to be in linear order @@ -434,7 +434,7 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L if fix_order == True: if len(er_component.features) > 1: for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i+1], er_component.features[i]) + constraint = sbol3.Constraint(sbol3.SBOL_MEETS, er_component.features[i], er_component.features[i+1]) er_component.constraints.append(constraint) else: pass From 408a297217aac4b3210a65164b6f1da86b73626b Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:43:20 +0000 Subject: [PATCH 38/42] adding a new document to test linear backbone adding a new document to test linear backbone from SBOL --- test/test_component.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_component.py b/test/test_component.py index 5cc28ef2..497a99a1 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -373,6 +373,8 @@ def test_backbone_bp011(self): hlc_doc.add([hl_circular_backbone_component, hl_circular_backbone_seq]) assert doc_diff(doc, hlc_doc) == 0, f'Constructor Error: Circular {backbone_identity} from SBOL' + hlc_doc = sbol3.Document() + doc = sbol3.Document() doc.add([linear_backbone_component, linear_backbone_seq]) hl_linear_backbone_component, hl_linear_backbone_seq = backbone_from_sbol(identity=backbone_identity, sbol_comp=linear_backbone_component, dropout_location=dropout_location, fusion_site_length=fusion_site_length, linear=True, description=test_description) hlc_doc.add([hl_linear_backbone_component, hl_linear_backbone_seq]) From f36b5b13d881c29d52433c3d2824f8ef1c1122bf Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:31:33 +0000 Subject: [PATCH 39/42] replacing meets for precedes in engineered region --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index e5998b88..43b9eda6 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -434,7 +434,7 @@ def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], L if fix_order == True: if len(er_component.features) > 1: for i in range(len(er_component.features)-1): - constraint = sbol3.Constraint(sbol3.SBOL_MEETS, er_component.features[i], er_component.features[i+1]) + constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, er_component.features[i], er_component.features[i+1]) er_component.constraints.append(constraint) else: pass From 5b771860001bdbe950f223f6fad38bc1e4d4cf74 Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:48:39 +0000 Subject: [PATCH 40/42] order is fixed by default un enr --- sbol_utilities/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbol_utilities/component.py b/sbol_utilities/component.py index 43b9eda6..ec047ec0 100644 --- a/sbol_utilities/component.py +++ b/sbol_utilities/component.py @@ -416,7 +416,7 @@ def operator(identity: str, sequence: str, **kwargs) -> Tuple[sbol3.Component, s return operator_component, operator_seq -def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool=False, **kwargs) \ +def engineered_region(identity: str, features: Union[List[sbol3.SubComponent], List[sbol3.Component]], fix_order: bool=True, **kwargs) \ -> sbol3.Component: """Creates an Engineered Region Component, with features assumed to be in linear order From 4e0e958b9605b4c980b90e70c15afafbcaee406e Mon Sep 17 00:00:00 2001 From: Gonzalo Vidal <35148159+Gonza10V@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:56:16 +0000 Subject: [PATCH 41/42] testing now appending constraints --- test/test_component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_component.py b/test/test_component.py index ce21aed2..ca386aa4 100644 --- a/test/test_component.py +++ b/test/test_component.py @@ -209,7 +209,7 @@ def test_high_level_constructors(self): if len(enr_comp.features) > 1: for i in range(len(enr_comp.features)-1): constraint = sbol3.Constraint(sbol3.SBOL_PRECEDES, enr_comp.features[i], enr_comp.features[i+1]) - enr_comp.constraints = [constraint] + enr_comp.constraints.append(constraint) else: pass hlc_doc.add(hlc_enr_comp) From 978b6af85754066dd58287d6709ea77bfb1967e4 Mon Sep 17 00:00:00 2001 From: Jacob Beal Date: Sun, 7 Apr 2024 15:46:49 -0500 Subject: [PATCH 42/42] Correct missing comma from conflict resolution in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7c355ed3..f863f823 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ 'tyto>=1.4', 'openpyxl', 'requests', - 'sbol_factory>=1.1' + 'sbol_factory>=1.1', 'pydna' ], extras_require={ # requirements for development