Skip to content

Commit 498113d

Browse files
committed
Fix the obtainKineticsFamilyStatistics
retrieves data involving the family Now shows the top node names, and also the number of rules csv file has headers now. Does not average the families or add training data. Assumes any type of manipulation has been done outside the folder. Also, don't load any kineticsLibraries or thermoLibraries by default except the primaryThermoLibrary. But do load all the depositories
1 parent c47a198 commit 498113d

1 file changed

Lines changed: 43 additions & 18 deletions

File tree

testing/evaluateKinetics.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,28 @@ def createParityPlot(parityData):
158158
plt.axis([minimum/10, maximum*10, minimum/10, maximum*10])
159159

160160
def countNodes(family):
161+
"""
162+
Count the number of groups under each tree in the Family's Groups.
163+
Returns a list containing the following information
164+
[Family Label, Number of Rules, Top Node Label 1, Number of Children, ..., Top Node Label N, Number of Children]
165+
"""
161166
countList=[family.label]
162167

163168
#get top nodes
164169
forwardTemplate = family.groups.top[:]
165170

166171
temporary = []
167-
symmetricTree = False
168172
for entry in forwardTemplate:
169173
if entry not in temporary:
170174
temporary.append(entry)
171175
else:
172-
# duplicate node found at top of tree
176+
# Duplicate node found at top of tree
173177
# eg. R_recombination: ['Y_rad', 'Y_rad']
174178
assert len(forwardTemplate)==2 , 'Can currently only do symmetric trees with nothing else in them'
175-
symmetricTree = True
179+
176180
forwardTemplate = temporary
177181

182+
countList.append(len(family.rules.entries))
178183
for group in forwardTemplate:
179184
checkList=[group]
180185
childrenList=[group]
@@ -183,28 +188,35 @@ def countNodes(family):
183188
checkList.extend(checkList[0].children)
184189
del checkList[0]
185190

186-
countList.append(len(childrenList))
191+
countList.extend([group.label, len(childrenList)])
192+
187193
return countList
188194

189195

190196
###########################################################################################################
191197
# Functions for the full Database
192198

193-
def countNodesAll(FullDatabase, trialDir):
194-
for family in FullDatabase.kinetics.families.values():
195-
family.addKineticsRulesFromTrainingSet(thermoDatabase=FullDatabase.thermo)
196-
199+
def obtainKineticsFamilyStatistics(FullDatabase, trialDir):
200+
"""
201+
Obtains statistics for the kinetics families by creating
202+
a FamilyStatistics.csv file that gives information about each family: the total number of
203+
rules, and the top node names and the number of groups under each.
204+
Note: does NOT average up the database or create any rate rules from training data.
205+
If that is desired it must be done prior to entering this function. (averaging may not be desired
206+
as it would add non-exact rules to the rule count)
207+
"""
197208
allFamilyNames=FullDatabase.kinetics.families.keys()
198209

199210
familyCount={}
200211

201212
for familyName in allFamilyNames:
202213
family=FullDatabase.kinetics.families[familyName]
203-
print "Processing", familyName + '...', '(' + str(len(family.rules.entries)) + ' nodes)'
214+
print "Processing", familyName + '...', '(' + str(len(family.rules.entries)) + ' rules)'
204215
familyCount[familyName]=countNodes(family)
205-
206-
with open(os.path.join(trialDir, 'NodeCount.csv'), 'wb') as csvfile:
216+
217+
with open(os.path.join(trialDir, 'FamilyStatistics.csv'), 'wb') as csvfile:
207218
csvwriter=csv.writer(csvfile)
219+
csvwriter.writerow(['Family','Number of Rules', 'Top Node 1', 'Number of Groups', 'Top Node 2', 'Number of Groups', 'Top Node 3', 'Number of Groups'])
208220
for key, value in familyCount.iteritems():
209221
csvwriter.writerow(value)
210222

@@ -324,23 +336,36 @@ def leaveOneOut(FullDatabase, trialDir):
324336

325337
if __name__ == '__main__':
326338
from rmgpy import settings
339+
340+
# Create the data evaluation directory
341+
trialDir = os.path.join(settings['database.directory'],'..','testing','eval')
342+
if not os.path.exists(trialDir):
343+
os.makedirs(trialDir)
344+
327345
print 'Loading the RMG database...'
328346
FullDatabase=RMGDatabase()
329347
FullDatabase.load(settings['database.directory'],
330-
kineticsFamilies=['Disproportionation'],
348+
kineticsFamilies=['intra_H_migration'],
331349
kineticsDepositories='all',
332-
thermoLibraries=[],
350+
thermoLibraries=['primaryThermoLibrary'], # Use just the primary thermo library, which contains necessary small molecular thermo
333351
reactionLibraries=[],
334352
)
335353

336-
trialDir = os.path.join(settings['database.directory'],'..','testing','eval')
337-
if not os.path.exists(trialDir):
338-
os.makedirs(trialDir)
354+
# Prepare the database by loading training reactions
355+
for family in FullDatabase.kinetics.families.values():
356+
family.addKineticsRulesFromTrainingSet(thermoDatabase=FullDatabase.thermo)
357+
358+
print 'Obtaining statistics for the families...'
359+
obtainKineticsFamilyStatistics(FullDatabase, trialDir)
360+
361+
# Fill in the rate rules by averaging when we are ready to retrieve kinetics
362+
for family in FullDatabase.kinetics.families.values():
363+
family.fillKineticsRulesByAveragingUp()
364+
365+
339366
print 'Evaluating the NIST Kinetics against the RMG estimates...'
340367
compareNIST(FullDatabase, trialDir)
341368

342-
print 'Counting the rate rules in the families...'
343-
countNodesAll(FullDatabase, trialDir)
344369

345370
print 'Performing the leave on out test on the kinetics families...'
346371
leaveOneOut(FullDatabase, trialDir)

0 commit comments

Comments
 (0)