Skip to content

Commit 86267b6

Browse files
committed
Add argument for performing leaveOneOutTest without averaging
Is not quite the original leaveOneOut test as it was done before. The original test takes extremely long to run because it must average up the family after each exact rate rule. In this method, we remove the original rate rule, and see if using an average that (should incorporate the original) does a good job of estimating that rate rule. The option to use the original version of the algorithm can still be used by setting averaging=True
1 parent 53d7907 commit 86267b6

1 file changed

Lines changed: 34 additions & 9 deletions

File tree

testing/evaluateKinetics.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def getKineticsDepository(FullDatabase, family, depositoryLabel):
7575

7676
return exactKinetics, approxKinetics
7777

78-
def getKineticsLeaveOneOut(family):
78+
def getKineticsLeaveOneOut(family, averaging=True):
7979
"""
8080
Performs the leave one out test on a family. It returns a dictionary of
8181
the original exact nodes and a dictionary of the new averaged nodes.
@@ -91,13 +91,35 @@ def getKineticsLeaveOneOut(family):
9191

9292
for entryKey in family.rules.entries.keys():
9393
template = family.retrieveTemplate(entryKey.split(';'))
94-
exactKinetics[entryKey], exactKineticsEntry=family.rules.estimateKinetics(template)
94+
exactKineticsData, exactKineticsEntry=family.rules.estimateKinetics(template)
95+
if exactKineticsData.comment:
96+
# This means it was an averaged node so skip
97+
continue
98+
99+
exactKinetics[entryKey] = exactKineticsData
100+
101+
if averaging:
102+
# In this scheme, we remove the data fully and try to pretend the database
103+
# wants this kinetic value when we know nothing about it
104+
familyCopy=copy.deepcopy(family)
105+
familyCopy.rules.entries.pop(entryKey)
106+
familyCopy.fillKineticsRulesByAveragingUp()
107+
approxKinetics[entryKey], approxKineticsEntry=familyCopy.rules.estimateKinetics(template)
108+
else:
109+
# In this scheme, do not re-average the tree, just try to see if the nearest
110+
# best node provides a good estimate for the original kinetics.
111+
# This takes significanty less time to run, but is not a true validation of the data,
112+
# it is just testing our kinetics selection algorithm
113+
114+
originalEntry = family.rules.entries(entryKey)
115+
family.rules.entries.pop(entryKey)
116+
approxKinetics[entryKey], approxKineticsEntry=family.rules.estimateKinetics(template)
117+
# Re-add the data back into the original family
118+
family.rules.entries[entryKey] = originalEntry
119+
120+
95121

96-
familyCopy=copy.deepcopy(family)
97-
familyCopy.rules.entries.pop(entryKey)
98-
familyCopy.fillKineticsRulesByAveragingUp()
99122

100-
approxKinetics[entryKey], approxKineticsEntry=familyCopy.rules.estimateKinetics(template)
101123

102124
return exactKinetics, approxKinetics
103125

@@ -289,7 +311,7 @@ def compareNIST(FullDatabase, trialDir):
289311

290312

291313

292-
def leaveOneOut(FullDatabase, trialDir):
314+
def leaveOneOut(FullDatabase, trialDir, averaging=True):
293315
"""
294316
Performs leave one out analysis on all the kinetics families.
295317
The algorithm deletes a single entry in the family, and then re-averages the tree
@@ -315,7 +337,10 @@ def leaveOneOut(FullDatabase, trialDir):
315337
if len(family.rules.entries) < 2:
316338
print ' Skipping', familyName, ': only has one rate rule...'
317339
else:
318-
exactKinetics, approxKinetics = getKineticsLeaveOneOut(family)
340+
if not averaging:
341+
# Pre-average the family if averaging is not turned on
342+
family.fillKineticsRulesByAveragingUp()
343+
exactKinetics, approxKinetics = getKineticsLeaveOneOut(family, averaging)
319344
parityData=analyzeForParity(exactKinetics, approxKinetics, cutoff=8.0)
320345

321346
if len(parityData)<2:
@@ -370,7 +395,7 @@ def leaveOneOut(FullDatabase, trialDir):
370395

371396
print '--------------------------------------------'
372397
print 'Performing the leave on out test on the kinetics families...'
373-
leaveOneOut(FullDatabase, trialDir)
398+
leaveOneOut(FullDatabase, trialDir, averaging=False)
374399

375400
print '--------------------------------------------'
376401
print 'Filling up the family rate rules by averaging... Expect larger number of rate rules in subsequent tests'

0 commit comments

Comments
 (0)