Skip to content

Commit f0db6ff

Browse files
committed
Fix leaveOneOut test and add some time stamping for the leaveOneTest
1 parent f255c7f commit f0db6ff

1 file changed

Lines changed: 27 additions & 15 deletions

File tree

testing/evaluateKinetics.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import re
1515
import copy
1616
import csv
17+
from time import time
1718

1819
from rmgpy.thermo import *
1920
from rmgpy.kinetics import *
@@ -80,12 +81,17 @@ def getKineticsLeaveOneOut(family):
8081
Performs the leave one out test on a family. It returns a dictionary of
8182
the original exact nodes and a dictionary of the new averaged nodes.
8283
The returned dictionary entries will be of a KineticModel class
84+
It deletes a single entry in the family, and then re-averages the tree
85+
and then tries to re-estimate that original deleted entry.
86+
87+
The original family should not contained averaged nodes when starting out. The
88+
leave one out test should be performed only for original exact matches.
8389
"""
8490
exactKinetics={}
8591
approxKinetics={}
8692

8793
for entryKey in family.rules.entries.keys():
88-
template = family.retrieveTemplate(entryKey)
94+
template = family.retrieveTemplate(entryKey.split(';'))
8995
exactKinetics[entryKey], exactKineticsEntry=family.rules.estimateKinetics(template)
9096

9197
familyCopy=copy.deepcopy(family)
@@ -285,17 +291,19 @@ def compareNIST(FullDatabase, trialDir):
285291
def leaveOneOut(FullDatabase, trialDir):
286292
"""
287293
Performs leave one out analysis on all the kinetics families.
294+
The algorithm deletes a single entry in the family, and then re-averages the tree
295+
and then tries to re-estimate that original deleted entry. The difference between
296+
these values is used to create a parity plot and averaged mean squared error statistics.
297+
298+
Note: training data and averaging of the database is not performed at the beginning of
299+
this function, and must be performed outside the function. Averaging the trees should not
300+
be performed so as to not perform the leave one out test on rate rules that were averaged.
288301
"""
289302

290-
trialDir=os.path.join(trialDir, 'LeaveOneOut')
303+
trialDir=os.path.join(trialDir, 'leaveOneOut')
291304
if not os.path.exists(trialDir):
292305
os.makedirs(trialDir)
293306

294-
for family in FullDatabase.kinetics.families.values():
295-
family.addKineticsRulesFromTrainingSet(thermoDatabase=FullDatabase.thermo)
296-
297-
# familyName='intra_substitutionCS_isomerization'
298-
# allFamilyNames=[familyName]
299307
allFamilyNames=FullDatabase.kinetics.families.keys()
300308

301309
QDict={}
@@ -306,7 +314,12 @@ def leaveOneOut(FullDatabase, trialDir):
306314
if len(family.rules.entries) < 2:
307315
print ' Skipping', familyName, ': only has one rate rule...'
308316
else:
317+
318+
start_time = time()
309319
exactKinetics, approxKinetics = getKineticsLeaveOneOut(family)
320+
end_time = time()
321+
time_taken = end_time - start_time
322+
print "Time spent: {0:.2f} minutes".format(time_taken/60.0)
310323
parityData=analyzeForParity(exactKinetics, approxKinetics, cutoff=8.0)
311324

312325
if len(parityData)<2:
@@ -345,28 +358,27 @@ def leaveOneOut(FullDatabase, trialDir):
345358
print 'Loading the RMG database...'
346359
FullDatabase=RMGDatabase()
347360
FullDatabase.load(settings['database.directory'],
348-
kineticsFamilies=['intra_H_migration'],
361+
kineticsFamilies=['Cyclic_Ether_Formation'],
349362
kineticsDepositories='all',
350363
thermoLibraries=['primaryThermoLibrary'], # Use just the primary thermo library, which contains necessary small molecular thermo
351364
reactionLibraries=[],
352365
)
353366

354-
# Prepare the database by loading training reactions
367+
# Prepare the database by loading training reactions but not averaging the rate rules
355368
for family in FullDatabase.kinetics.families.values():
356369
family.addKineticsRulesFromTrainingSet(thermoDatabase=FullDatabase.thermo)
357-
370+
358371
print 'Obtaining statistics for the families...'
359372
obtainKineticsFamilyStatistics(FullDatabase, trialDir)
360373

361-
# Fill in the rate rules by averaging when we are ready to retrieve kinetics
374+
print 'Performing the leave on out test on the kinetics families...'
375+
leaveOneOut(FullDatabase, trialDir)
376+
377+
# Fill in the rate rules by averaging when we are ready to compare real kinetics
362378
for family in FullDatabase.kinetics.families.values():
363379
family.fillKineticsRulesByAveragingUp()
364380

365381

366382
print 'Evaluating the NIST Kinetics against the RMG estimates...'
367383
compareNIST(FullDatabase, trialDir)
368-
369-
370-
print 'Performing the leave on out test on the kinetics families...'
371-
leaveOneOut(FullDatabase, trialDir)
372384

0 commit comments

Comments
 (0)