@@ -158,23 +158,28 @@ def createParityPlot(parityData):
158158 plt .axis ([minimum / 10 , maximum * 10 , minimum / 10 , maximum * 10 ])
159159
160160def countNodes (family ):
161+ """
162+ Count the number of groups under each tree in the Family's Groups.
163+ Returns a list containing the following information
164+ [Family Label, Number of Rules, Top Node Label 1, Number of Children, ..., Top Node Label N, Number of Children]
165+ """
161166 countList = [family .label ]
162167
163168 #get top nodes
164169 forwardTemplate = family .groups .top [:]
165170
166171 temporary = []
167- symmetricTree = False
168172 for entry in forwardTemplate :
169173 if entry not in temporary :
170174 temporary .append (entry )
171175 else :
172- # duplicate node found at top of tree
176+ # Duplicate node found at top of tree
173177 # eg. R_recombination: ['Y_rad', 'Y_rad']
174178 assert len (forwardTemplate )== 2 , 'Can currently only do symmetric trees with nothing else in them'
175- symmetricTree = True
179+
176180 forwardTemplate = temporary
177181
182+ countList .append (len (family .rules .entries ))
178183 for group in forwardTemplate :
179184 checkList = [group ]
180185 childrenList = [group ]
@@ -183,28 +188,35 @@ def countNodes(family):
183188 checkList .extend (checkList [0 ].children )
184189 del checkList [0 ]
185190
186- countList .append (len (childrenList ))
191+ countList .extend ([group .label , len (childrenList )])
192+
187193 return countList
188194
189195
190196###########################################################################################################
191197# Functions for the full Database
192198
193- def countNodesAll (FullDatabase , trialDir ):
194- for family in FullDatabase .kinetics .families .values ():
195- family .addKineticsRulesFromTrainingSet (thermoDatabase = FullDatabase .thermo )
196-
199+ def obtainKineticsFamilyStatistics (FullDatabase , trialDir ):
200+ """
201+ Obtains statistics for the kinetics families by creating
202+ a FamilyStatistics.csv file that gives information about each family: the total number of
203+ rules, and the top node names and the number of groups under each.
204+ Note: does NOT average up the database or create any rate rules from training data.
205+ If that is desired it must be done prior to entering this function. (averaging may not be desired
206+ as it would add non-exact rules to the rule count)
207+ """
197208 allFamilyNames = FullDatabase .kinetics .families .keys ()
198209
199210 familyCount = {}
200211
201212 for familyName in allFamilyNames :
202213 family = FullDatabase .kinetics .families [familyName ]
203- print "Processing" , familyName + '...' , '(' + str (len (family .rules .entries )) + ' nodes )'
214+ print "Processing" , familyName + '...' , '(' + str (len (family .rules .entries )) + ' rules )'
204215 familyCount [familyName ]= countNodes (family )
205-
206- with open (os .path .join (trialDir , 'NodeCount .csv' ), 'wb' ) as csvfile :
216+
217+ with open (os .path .join (trialDir , 'FamilyStatistics .csv' ), 'wb' ) as csvfile :
207218 csvwriter = csv .writer (csvfile )
219+ csvwriter .writerow (['Family' ,'Number of Rules' , 'Top Node 1' , 'Number of Groups' , 'Top Node 2' , 'Number of Groups' , 'Top Node 3' , 'Number of Groups' ])
208220 for key , value in familyCount .iteritems ():
209221 csvwriter .writerow (value )
210222
@@ -324,23 +336,36 @@ def leaveOneOut(FullDatabase, trialDir):
324336
325337if __name__ == '__main__' :
326338 from rmgpy import settings
339+
340+ # Create the data evaluation directory
341+ trialDir = os .path .join (settings ['database.directory' ],'..' ,'testing' ,'eval' )
342+ if not os .path .exists (trialDir ):
343+ os .makedirs (trialDir )
344+
327345 print 'Loading the RMG database...'
328346 FullDatabase = RMGDatabase ()
329347 FullDatabase .load (settings ['database.directory' ],
330- kineticsFamilies = ['Disproportionation ' ],
348+ kineticsFamilies = ['intra_H_migration ' ],
331349 kineticsDepositories = 'all' ,
332- thermoLibraries = [],
350+ thermoLibraries = ['primaryThermoLibrary' ], # Use just the primary thermo library, which contains necessary small molecular thermo
333351 reactionLibraries = [],
334352 )
335353
336- trialDir = os .path .join (settings ['database.directory' ],'..' ,'testing' ,'eval' )
337- if not os .path .exists (trialDir ):
338- os .makedirs (trialDir )
354+ # Prepare the database by loading training reactions
355+ for family in FullDatabase .kinetics .families .values ():
356+ family .addKineticsRulesFromTrainingSet (thermoDatabase = FullDatabase .thermo )
357+
358+ print 'Obtaining statistics for the families...'
359+ obtainKineticsFamilyStatistics (FullDatabase , trialDir )
360+
361+ # Fill in the rate rules by averaging when we are ready to retrieve kinetics
362+ for family in FullDatabase .kinetics .families .values ():
363+ family .fillKineticsRulesByAveragingUp ()
364+
365+
339366 print 'Evaluating the NIST Kinetics against the RMG estimates...'
340367 compareNIST (FullDatabase , trialDir )
341368
342- print 'Counting the rate rules in the families...'
343- countNodesAll (FullDatabase , trialDir )
344369
345370 print 'Performing the leave on out test on the kinetics families...'
346371 leaveOneOut (FullDatabase , trialDir )
0 commit comments