Skip to content

Commit 70daf53

Browse files
committed
Add convertKineticsLibraryToTrainingReactions script
Specify the kinetics library name below and run the script. It automatically overwrites the training reactions files it needs to. Then you should commit those files. This script only trains safely. In other words, if a single match from an RMG family is found, a training reaction is created. Sometimes, there are no matches from RMG reaction families, or multiple matches. This indicates an error that requires manual fixing, and a printout is given in the script.
1 parent cec77cc commit 70daf53

1 file changed

Lines changed: 394 additions & 0 deletions

File tree

Lines changed: 394 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,394 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Convert Kinetics Library to Training Reactions Script\n",
8+
"\n",
9+
"Specify the kinetics library name below and run the script. It automatically overwrites the training reactions files it needs to. Then you should commit those files.\n",
10+
"\n",
11+
"This script only trains safely. In other words, if a single match from an RMG family is found, a training reaction is created. Sometimes, there are no matches from RMG reaction families, or multiple matches. This indicates an error that requires manual fixing, and a printout is given in the script."
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {
18+
"collapsed": true
19+
},
20+
"outputs": [],
21+
"source": [
22+
"libraryName = 'vinylCPD_H'"
23+
]
24+
},
25+
{
26+
"cell_type": "code",
27+
"execution_count": null,
28+
"metadata": {
29+
"collapsed": false
30+
},
31+
"outputs": [],
32+
"source": [
33+
"from rmgpy.data.rmg import RMGDatabase\n",
34+
"from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n",
35+
"from rmgpy.rmg.model import Species\n",
36+
"from rmgpy import settings\n",
37+
"from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"\n",
45+
"## load lib_rxn"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"metadata": {
52+
"collapsed": false
53+
},
54+
"outputs": [],
55+
"source": [
56+
"database = RMGDatabase()\n",
57+
"database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = [libraryName], kineticsDepositories='all')"
58+
]
59+
},
60+
{
61+
"cell_type": "markdown",
62+
"metadata": {},
63+
"source": [
64+
"## generate fam_rxn, spec replacement and get reactionDict"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {
71+
"collapsed": false,
72+
"scrolled": false
73+
},
74+
"outputs": [],
75+
"source": [
76+
"reactionDict = {}\n",
77+
"kineticLibrary = database.kinetics.libraries[libraryName]\n",
78+
"for index, entry in kineticLibrary.entries.iteritems():\n",
79+
" lib_rxn = entry.item\n",
80+
" lib_rxn.kinetics = entry.data \n",
81+
" lib_rxn.index = entry.index\n",
82+
" lib_rxn.kinetics.comment = entry.label # Assign the entry's label to the comment\n",
83+
" # Let's make RMG try to generate this reaction from the families!\n",
84+
" fam_rxn_list = []\n",
85+
" rxt_mol_mutation_num = 1\n",
86+
" pdt_mol_mutation_num = 1\n",
87+
" for reactant in lib_rxn.reactants:\n",
88+
" rxt_mol_mutation_num *= len(reactant.molecule)\n",
89+
"\n",
90+
" for product in lib_rxn.products:\n",
91+
" pdt_mol_mutation_num *= len(product.molecule)\n",
92+
"\n",
93+
" for mutation_i in range(rxt_mol_mutation_num):\n",
94+
" rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n",
95+
" pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n",
96+
" fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n",
97+
" reactants=rxts_mol, products=pdts_mol))\n",
98+
"\n",
99+
"\n",
100+
" if len(fam_rxn_list) == 1:\n",
101+
" fam_rxn = fam_rxn_list[0]\n",
102+
"\n",
103+
" # danger: the fam_rxn may have switched the reactants with products\n",
104+
" # fam_rxn is survived from def filterReactions\n",
105+
" # so it's matched with lib_rxn only we have to \n",
106+
" # determine the direction\n",
107+
" lib_reactants = [r for r in lib_rxn.reactants] \n",
108+
" fam_reactants = [r for r in fam_rxn.reactants]\n",
109+
" for lib_reactant in lib_reactants:\n",
110+
" for fam_reactant in fam_reactants:\n",
111+
" if lib_reactant.isIsomorphic(fam_reactant):\n",
112+
" fam_reactants.remove(fam_reactant)\n",
113+
" break\n",
114+
"\n",
115+
" lib_products = [r for r in lib_rxn.products] \n",
116+
" fam_products = [r for r in fam_rxn.products]\n",
117+
" for lib_product in lib_products:\n",
118+
" for fam_product in fam_products:\n",
119+
" if lib_product.isIsomorphic(fam_product):\n",
120+
" fam_products.remove(fam_product)\n",
121+
" break\n",
122+
"\n",
123+
" forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n",
124+
" # find the labeled atoms using family and reactants & products from fam_rxn \n",
125+
" addAtomLabelsForReaction(fam_rxn, database)\n",
126+
" # species replacement so that labeledAtoms is retored\n",
127+
" if forward:\n",
128+
" lib_rxn.reactants = fam_rxn.reactants\n",
129+
" lib_rxn.products = fam_rxn.products\n",
130+
" else:\n",
131+
" lib_rxn.reactants = fam_rxn.products\n",
132+
" lib_rxn.products = fam_rxn.reactants\n",
133+
" if fam_rxn.family in reactionDict:\n",
134+
" reactionDict[fam_rxn.family].append(lib_rxn)\n",
135+
" else:\n",
136+
" reactionDict[fam_rxn.family] = [lib_rxn]\n",
137+
"\n",
138+
" elif len(fam_rxn_list) == 0:\n",
139+
" print \"Sad :( There are no matches. This is a magic reaction or has chemistry that should be made into a new reaction family\"\n",
140+
" print ''\n",
141+
" print lib_rxn\n",
142+
" print ''\n",
143+
" print 'Reactant SMILES:'\n",
144+
" for reactant in lib_rxn.reactants:\n",
145+
" print reactant.molecule[0].toSMILES()\n",
146+
" print 'Product SMILES:'\n",
147+
" for product in lib_rxn.products:\n",
148+
" print product.molecule[0].toSMILES()\n",
149+
" print '==============='\n",
150+
" else:\n",
151+
" print \"There are multiple RMG matches for this reaction. You have to manually create this training reaction\"\n",
152+
" print ''\n",
153+
" print lib_rxn\n",
154+
" print ''\n",
155+
" print 'Reactant SMILES:'\n",
156+
" for reactant in lib_rxn.reactants:\n",
157+
" print reactant.molecule[0].toSMILES()\n",
158+
" print 'Product SMILES'\n",
159+
" for product in lib_rxn.products:\n",
160+
" print product.molecule[0].toSMILES()\n",
161+
" print ''\n",
162+
" print 'The following families were matched:'\n",
163+
" for rxn in fam_rxn_list:\n",
164+
" print rxn.family\n",
165+
" print '==============='\n",
166+
"\n",
167+
"\n"
168+
]
169+
},
170+
{
171+
"cell_type": "code",
172+
"execution_count": null,
173+
"metadata": {
174+
"collapsed": false
175+
},
176+
"outputs": [],
177+
"source": [
178+
"for familyName in reactionDict:\n",
179+
" print 'Adding training reactions for family: ' + familyName\n",
180+
" kineticFamily = database.kinetics.families[familyName]\n",
181+
" trainingDatabase = None\n",
182+
" for depository in kineticFamily.depositories:\n",
183+
" if depository.label.endswith('training'):\n",
184+
" trainingDatabase = depository\n",
185+
" break\n",
186+
" reactions = reactionDict[familyName]\n",
187+
" print 'reactions.py previously has {} rxns. Now adding {} new rxn(s).'.format(len(trainingDatabase.entries.values()), len(reactions))\n",
188+
" print '================='\n",
189+
" kineticFamily.saveTrainingReactions(reactions, shortDesc='Training reaction from kinetics library: {0}'.format(libraryName))"
190+
]
191+
},
192+
{
193+
"cell_type": "markdown",
194+
"metadata": {},
195+
"source": [
196+
"# How saveTrainingReaction works\n",
197+
"\n",
198+
"This part of the script is commented and should not be run. It serves only to demonstrate how the code for saving the training reactions works.\n",
199+
"\n",
200+
"## get speciesDict\n",
201+
"\n",
202+
"### load existing species as an intial speciesDict"
203+
]
204+
},
205+
{
206+
"cell_type": "code",
207+
"execution_count": null,
208+
"metadata": {
209+
"collapsed": false
210+
},
211+
"outputs": [],
212+
"source": [
213+
"# import os\n",
214+
"# from rmgpy.data.base import Database\n",
215+
"\n",
216+
"# training_path = os.path.join(settings['database.directory'], \\\n",
217+
"# 'kinetics', 'families', 'R_Addition_MultipleBond', 'training')\n",
218+
"\n",
219+
"# dictionary_file = os.path.join(training_path, 'dictionary.txt')\n",
220+
"\n",
221+
"# # Load the existing set of the species of the training reactions\n",
222+
"# speciesDict = Database().getSpecies(dictionary_file)"
223+
]
224+
},
225+
{
226+
"cell_type": "markdown",
227+
"metadata": {},
228+
"source": [
229+
"### for one family check uniqueness of each species in the lib_rxns"
230+
]
231+
},
232+
{
233+
"cell_type": "code",
234+
"execution_count": null,
235+
"metadata": {
236+
"collapsed": false,
237+
"scrolled": false
238+
},
239+
"outputs": [],
240+
"source": [
241+
"# familyName = 'R_Addition_MultipleBond'\n",
242+
"# print 'Adding training reactions for family: ' + familyName\n",
243+
"# kineticFamily = database.kinetics.families[familyName]\n",
244+
"# reactions = reactionDict[familyName]\n",
245+
"\n",
246+
"# for rxn in reactions:\n",
247+
"# for spec in (rxn.reactants + rxn.products):\n",
248+
"# for ex_spec_label in speciesDict:\n",
249+
"# ex_spec = speciesDict[ex_spec_label]\n",
250+
"# if ex_spec.molecule[0].getFormula() != spec.molecule[0].getFormula():\n",
251+
"# continue\n",
252+
"# else:\n",
253+
"# spec_labeledAtoms = spec.molecule[0].getLabeledAtoms()\n",
254+
"# ex_spec_labeledAtoms = ex_spec.molecule[0].getLabeledAtoms()\n",
255+
"# initialMap = {}\n",
256+
"# try:\n",
257+
"# for atomLabel in spec_labeledAtoms:\n",
258+
"# initialMap[spec_labeledAtoms[atomLabel]] = ex_spec_labeledAtoms[atomLabel]\n",
259+
"# except KeyError:\n",
260+
"# # atom labels did not match, therefore not a match\n",
261+
"# continue\n",
262+
"# if spec.molecule[0].isIsomorphic(ex_spec.molecule[0],initialMap):\n",
263+
"# spec.label = ex_spec.label\n",
264+
"# break\n",
265+
"# else:# no isomorphic existing species found\n",
266+
"# spec_formula = spec.molecule[0].getFormula()\n",
267+
"# if spec_formula not in speciesDict:\n",
268+
"# spec.label = spec_formula\n",
269+
"# else:\n",
270+
"# index = 2\n",
271+
"# while (spec_formula + '-{}'.format(index)) in speciesDict:\n",
272+
"# index += 1\n",
273+
"# spec.label = spec_formula + '-{}'.format(index)\n",
274+
"# speciesDict[spec.label] = spec"
275+
]
276+
},
277+
{
278+
"cell_type": "markdown",
279+
"metadata": {},
280+
"source": [
281+
"## save to files\n",
282+
"\n",
283+
"Save reactionDict to reactions.py and speciesDict to dictionary.txt"
284+
]
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": null,
289+
"metadata": {
290+
"collapsed": false
291+
},
292+
"outputs": [],
293+
"source": [
294+
"# # try to append \n",
295+
"# training_file = open(os.path.join(settings['database.directory'], 'kinetics', 'families', \\\n",
296+
"# kineticFamily.label, 'training', 'reactions_test.py'), 'a')\n",
297+
"\n",
298+
"# training_file.write(\"\\n\\n\")"
299+
]
300+
},
301+
{
302+
"cell_type": "code",
303+
"execution_count": null,
304+
"metadata": {
305+
"collapsed": true
306+
},
307+
"outputs": [],
308+
"source": [
309+
"# # find the largest reaction index\n",
310+
"# for depository in kineticFamily.depositories:\n",
311+
"# if depository.label.endswith('training'):\n",
312+
"# break\n",
313+
"# else:\n",
314+
"# logging.info('Could not find training depository in family {0}.'.format(kineticFamily.label))\n",
315+
"# logging.info('Starting a new one')\n",
316+
"# depository = KineticsDepository()\n",
317+
"# kineticFamily.depositories.append(depository)\n",
318+
"\n",
319+
"# trainingDatabase = depository\n",
320+
"# indices = [entry.index for entry in trainingDatabase.entries.values()]\n",
321+
"# if indices:\n",
322+
"# maxIndex = max(indices)\n",
323+
"# else:\n",
324+
"# maxIndex = 0"
325+
]
326+
},
327+
{
328+
"cell_type": "code",
329+
"execution_count": null,
330+
"metadata": {
331+
"collapsed": false
332+
},
333+
"outputs": [],
334+
"source": [
335+
"# # save reactions.py\n",
336+
"# from rmgpy.data.base import Entry\n",
337+
"# for i, reaction in enumerate(reactions): \n",
338+
"# entry = Entry(\n",
339+
"# index = maxIndex+i+1,\n",
340+
"# label = str(reaction),\n",
341+
"# item = reaction,\n",
342+
"# data = reaction.kinetics,\n",
343+
"# reference = None,\n",
344+
"# referenceType = '',\n",
345+
"# shortDesc = unicode(''),\n",
346+
"# longDesc = unicode(''),\n",
347+
"# rank = 3,\n",
348+
"# )\n",
349+
"# print reaction\n",
350+
"# kineticFamily.saveEntry(training_file, entry)\n",
351+
"\n",
352+
"# training_file.close()"
353+
]
354+
},
355+
{
356+
"cell_type": "code",
357+
"execution_count": null,
358+
"metadata": {
359+
"collapsed": true
360+
},
361+
"outputs": [],
362+
"source": [
363+
"# # save dictionary.txt\n",
364+
"# directory_test_file = os.path.join(training_path, 'directory_test.txt')\n",
365+
"# with open(directory_test_file, 'w') as f:\n",
366+
"# for label in speciesDict.keys():\n",
367+
"# f.write(speciesDict[label].molecule[0].toAdjacencyList(label=label, removeH=False))\n",
368+
"# f.write('\\n')\n",
369+
"# f.close()"
370+
]
371+
}
372+
],
373+
"metadata": {
374+
"kernelspec": {
375+
"display_name": "Python 2",
376+
"language": "python",
377+
"name": "python2"
378+
},
379+
"language_info": {
380+
"codemirror_mode": {
381+
"name": "ipython",
382+
"version": 2
383+
},
384+
"file_extension": ".py",
385+
"mimetype": "text/x-python",
386+
"name": "python",
387+
"nbconvert_exporter": "python",
388+
"pygments_lexer": "ipython2",
389+
"version": "2.7.11"
390+
}
391+
},
392+
"nbformat": 4,
393+
"nbformat_minor": 0
394+
}

0 commit comments

Comments
 (0)