1414import re
1515import copy
1616import csv
17+ from time import time
1718
1819from rmgpy .thermo import *
1920from rmgpy .kinetics import *
@@ -80,12 +81,17 @@ def getKineticsLeaveOneOut(family):
8081 Performs the leave one out test on a family. It returns a dictionary of
8182 the original exact nodes and a dictionary of the new averaged nodes.
8283 The returned dictionary entries will be of a KineticModel class
84+ It deletes a single entry in the family, and then re-averages the tree
85+ and then tries to re-estimate that original deleted entry.
86+
87+ The original family should not contained averaged nodes when starting out. The
88+ leave one out test should be performed only for original exact matches.
8389 """
8490 exactKinetics = {}
8591 approxKinetics = {}
8692
8793 for entryKey in family .rules .entries .keys ():
88- template = family .retrieveTemplate (entryKey )
94+ template = family .retrieveTemplate (entryKey . split ( ';' ) )
8995 exactKinetics [entryKey ], exactKineticsEntry = family .rules .estimateKinetics (template )
9096
9197 familyCopy = copy .deepcopy (family )
@@ -285,17 +291,19 @@ def compareNIST(FullDatabase, trialDir):
285291def leaveOneOut (FullDatabase , trialDir ):
286292 """
287293 Performs leave one out analysis on all the kinetics families.
294+ The algorithm deletes a single entry in the family, and then re-averages the tree
295+ and then tries to re-estimate that original deleted entry. The difference between
296+ these values is used to create a parity plot and averaged mean squared error statistics.
297+
298+ Note: training data and averaging of the database is not performed at the beginning of
299+ this function, and must be performed outside the function. Averaging the trees should not
300+ be performed so as to not perform the leave one out test on rate rules that were averaged.
288301 """
289302
290- trialDir = os .path .join (trialDir , 'LeaveOneOut ' )
303+ trialDir = os .path .join (trialDir , 'leaveOneOut ' )
291304 if not os .path .exists (trialDir ):
292305 os .makedirs (trialDir )
293306
294- for family in FullDatabase .kinetics .families .values ():
295- family .addKineticsRulesFromTrainingSet (thermoDatabase = FullDatabase .thermo )
296-
297- # familyName='intra_substitutionCS_isomerization'
298- # allFamilyNames=[familyName]
299307 allFamilyNames = FullDatabase .kinetics .families .keys ()
300308
301309 QDict = {}
@@ -306,7 +314,12 @@ def leaveOneOut(FullDatabase, trialDir):
306314 if len (family .rules .entries ) < 2 :
307315 print ' Skipping' , familyName , ': only has one rate rule...'
308316 else :
317+
318+ start_time = time ()
309319 exactKinetics , approxKinetics = getKineticsLeaveOneOut (family )
320+ end_time = time ()
321+ time_taken = end_time - start_time
322+ print "Time spent: {0:.2f} minutes" .format (time_taken / 60.0 )
310323 parityData = analyzeForParity (exactKinetics , approxKinetics , cutoff = 8.0 )
311324
312325 if len (parityData )< 2 :
@@ -345,28 +358,27 @@ def leaveOneOut(FullDatabase, trialDir):
345358 print 'Loading the RMG database...'
346359 FullDatabase = RMGDatabase ()
347360 FullDatabase .load (settings ['database.directory' ],
348- kineticsFamilies = ['intra_H_migration ' ],
361+ kineticsFamilies = ['Cyclic_Ether_Formation ' ],
349362 kineticsDepositories = 'all' ,
350363 thermoLibraries = ['primaryThermoLibrary' ], # Use just the primary thermo library, which contains necessary small molecular thermo
351364 reactionLibraries = [],
352365 )
353366
354- # Prepare the database by loading training reactions
367+ # Prepare the database by loading training reactions but not averaging the rate rules
355368 for family in FullDatabase .kinetics .families .values ():
356369 family .addKineticsRulesFromTrainingSet (thermoDatabase = FullDatabase .thermo )
357-
370+
358371 print 'Obtaining statistics for the families...'
359372 obtainKineticsFamilyStatistics (FullDatabase , trialDir )
360373
361- # Fill in the rate rules by averaging when we are ready to retrieve kinetics
374+ print 'Performing the leave on out test on the kinetics families...'
375+ leaveOneOut (FullDatabase , trialDir )
376+
377+ # Fill in the rate rules by averaging when we are ready to compare real kinetics
362378 for family in FullDatabase .kinetics .families .values ():
363379 family .fillKineticsRulesByAveragingUp ()
364380
365381
366382 print 'Evaluating the NIST Kinetics against the RMG estimates...'
367383 compareNIST (FullDatabase , trialDir )
368-
369-
370- print 'Performing the leave on out test on the kinetics families...'
371- leaveOneOut (FullDatabase , trialDir )
372384
0 commit comments