Skip to content

Commit 5c54719

Browse files
mjohnson541nyee
authored andcommitted
addition of a database test for unimolecular groups having multiple trees, checks that end group labels are consistent throughout each end group tree, that the backbone has all labels present in the end groups and labels indicating the shortest path between end groups, and that each end group subgraph in each entry in the backbone tree is the top level of the corresponding end group tree
1 parent b5d3234 commit 5c54719

1 file changed

Lines changed: 182 additions & 3 deletions

File tree

testing/databaseTest.py

Lines changed: 182 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ def test_kinetics(self):
8080
self.compat_func_name = test_name
8181
yield test, family_name
8282

83+
if len(family.forwardTemplate.reactants)==1 and len(family.groups.top) != 1 and family_name != 'Diels_alder_addition':
84+
test = lambda x: self.kinetics_checkUnimolecularGroups(family_name)
85+
test_name = "Kinetics family {0} check that unimolecular group is formatted correctly?".format(family_name)
86+
test.description = test_name
87+
self.compat_func_name = test_name
88+
yield test, family_name
89+
8390
for depository in family.depositories:
8491

8592
test = lambda x: self.kinetics_checkAdjlistsNonidentical(depository)
@@ -95,7 +102,8 @@ def test_kinetics(self):
95102
test.description = test_name
96103
self.compat_func_name = test_name
97104
yield test, library_name
98-
105+
106+
99107
def test_thermo(self):
100108
for group_name, group in self.database.thermo.groups.iteritems():
101109
test = lambda x: self.general_checkNodesFoundInTree(group_name, group)
@@ -439,8 +447,179 @@ def kinetics_checkCdAtomType(self, family_name):
439447
The following adjList may have atoms in a different ordering than the input file:
440448
{4}
441449
""".format(family_name, entry, correctAtom, index+1, entry.item.toAdjacencyList()))
442-
443-
450+
451+
def kinetics_checkUnimolecularGroups(self,family_name):
452+
"""
453+
This test goes through all unimolecular groups that have more than one top level, the first
454+
top level is assumed to be the backbone (contains the whole reactant molecule) and the other top levels are assumed
455+
to be endgroups
456+
the following are checked:
457+
1)endgroup entries have the same labels as their top level entry
458+
2)backbone groups have all labels that endgroups have
459+
3)backbone groups have labels tracing between the endgroups that follow the shortest path
460+
4)the backbone subgraph corresponding to each endgroup is the top level entry of the
461+
corresponding endgroup for every endgroup
462+
"""
463+
def find_shortest_path(start, end, path=None):
464+
path = path if path else []
465+
path = path + [start]
466+
if start == end:
467+
return path
468+
469+
shortest = None
470+
for node,_ in start.bonds.iteritems():
471+
if node not in path:
472+
newpath = find_shortest_path(node, end, path)
473+
if newpath:
474+
if not shortest or len(newpath) < len(shortest):
475+
shortest = newpath
476+
return shortest
477+
478+
def getEndFromBackbone(backbone, endLabels):
479+
"""
480+
:param backbone: :class: Entry for a backbone of molecule
481+
:param endLabels: Labels in the end groups
482+
:return: A subgraph representing the end group of the molecule
483+
"""
484+
#make copy for manipulation
485+
copyGroup = backbone.item.copy(True)
486+
487+
#Find the endGroup atoms
488+
for atom in copyGroup.atoms:
489+
if atom.label in endLabels:
490+
midAtom = atom
491+
break
492+
493+
#find the bonds to break
494+
bondsToBreak = []
495+
for atom2, bond in midAtom.bonds.iteritems():
496+
if atom2.label is None or atom2.label not in endLabels: #
497+
bondsToBreak.append(bond)
498+
499+
500+
for bond in bondsToBreak:
501+
copyGroup.removeBond(bond)
502+
503+
#split group into end and backbone fragment
504+
groups = copyGroup.split()
505+
506+
#verify group was split correctly and identify the correct end group
507+
endLabels = set(endLabels)
508+
for group in groups:
509+
groupLabels = set(atom.label for atom in group.atoms)
510+
groupLabels.discard('')
511+
if endLabels == groupLabels:
512+
break
513+
else:
514+
print(endLabels)
515+
print(groupLabels)
516+
for group in groups:
517+
print(group.toAdjacencyList(label=backbone.label))
518+
raise Exception("Group {0} not split correctly".format(backbone.label))
519+
520+
return group
521+
522+
523+
family = self.database.kinetics.families[family_name]
524+
525+
backbone = family.forwardTemplate.reactants[0]
526+
527+
endGroups = [entry for entry in family.groups.top if entry not in family.forwardTemplate.reactants]
528+
529+
endLabels = {}
530+
for endGroup in endGroups:
531+
labels = []
532+
for atom in endGroup.item.atoms:
533+
if atom.label:
534+
labels.append(atom.label)
535+
endLabels[endGroup] = set(labels)
536+
537+
#one atom from each end group
538+
midLabels = ["*1", "*3"]
539+
540+
# set of all end_labels should be backbone label
541+
backboneLabel = set([])
542+
for end, end_label in endLabels.iteritems():
543+
for label in end_label:
544+
backboneLabel.add(label)
545+
546+
#define types of errors
547+
A = [] #end groups have too many labels
548+
B = [] #end group lacks necessary label
549+
C = [] #backbone missing end group labels
550+
D = [] #backbone missing labels in between groups
551+
E = [] #backbone tries to define atoms inside end groups
552+
for group_name, entry in family.groups.entries.iteritems():
553+
if isinstance(entry.item, Group):
554+
group = entry.item
555+
if backbone in family.ancestors(entry):
556+
for atom in group.atoms:
557+
if atom.label: presentLabels.add(atom.label)
558+
#Check C
559+
for endGroup, labels in endLabels.iteritems():
560+
if not labels.issubset(presentLabels):
561+
C.append([endGroup, entry])
562+
#check D
563+
midAtoms = [group.getLabeledAtom(x) for x in midLabels]
564+
pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
565+
for atom in pathAtoms:
566+
if not atom.label:
567+
D.append([backbone, entry])
568+
break
569+
#check E
570+
for endGroup, labels in endLabels.iteritems():
571+
endFromBackbone = getEndFromBackbone(entry, labels)
572+
presentLabels = endFromBackbone.getLabeledAtoms()
573+
presentLabels = set(presentLabels.keys())
574+
if labels == presentLabels:
575+
if not endGroup.item.isIdentical(endFromBackbone):
576+
E.append([endGroup, entry])
577+
else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))
578+
579+
580+
581+
else:
582+
presentLabels = set([])
583+
for endNode, labelledAtoms in endLabels.iteritems():
584+
if endNode in family.ancestors(entry):
585+
for atom in group.atoms:
586+
if atom.label: presentLabels.add(atom.label)
587+
#Check A
588+
if not presentLabels.issubset(labelledAtoms):
589+
A.append([endNode, entry])
590+
#Check B
591+
if not labelledAtoms.issubset(presentLabels):
592+
B.append([endNode, entry])
593+
594+
595+
#print outputs
596+
if A != []:
597+
s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
598+
for x in A:
599+
s += '\n'+str(x)
600+
nose.tools.assert_true(False,s)
601+
if B != []:
602+
s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
603+
for x in B:
604+
s += '\n'+str(x)
605+
nose.tools.assert_true(False,s)
606+
if C != []:
607+
s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
608+
for x in C:
609+
s += '\n'+str(x)
610+
nose.tools.assert_true(False,s)
611+
if D != []:
612+
s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
613+
for x in D:
614+
s += '\n'+str(x)
615+
nose.tools.assert_true(False,s)
616+
if E != []:
617+
s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
618+
for x in E:
619+
s += '\n'+str(x)
620+
nose.tools.assert_true(False,s)
621+
622+
444623
def general_checkNodesFoundInTree(self, group_name, group):
445624
"""
446625
This test checks whether nodes are found in the tree, with proper parents.

0 commit comments

Comments
 (0)