Skip to content

Commit 1901b8a

Browse files
committed
scoop jobs can now be run through SGE
Updated scoop from 0.62 to development version of 0.7RC1. With this new version of scoop you can pass environment variables to workers through bash scripts (prolog.sh). Fixed the wrong usage of futures.map. Interestingly, older version was working correctly even with this bug. There are many debug loggings that should be deleted.
1 parent 5dd2c17 commit 1901b8a

7 files changed

Lines changed: 81 additions & 15 deletions

File tree

prolog.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
#The script that each workers of scoop runs.
3+
#Should be modified based on user profile.
4+
source ~/.bashrc
5+
export RMGQM="/opt/rmgqm"
6+
#set Gaussian03 environment variables
7+
g03root=/opt
8+
GAUSS_SCRDIR=/scratch/$USER
9+
export g03root GAUSS_SCRDIR
10+
GAUSS_EXEDIR="$g03root/g03/"
11+
GAUSS_LEXEDIR="$g03root/g03/linda-exe"
12+
GAUSS_ARCHDIR="$g03root/g03/arch"
13+
GMAIN=$GAUSS_EXEDIR/g03
14+
PATH=$PATH:$GMAIN
15+
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GMAIN
16+
G03BASIS="$g03root/g03/basis"
17+
F_ERROPT1="271,271,2,1,2,2,2,2"
18+
TRAP_FPE="OVERFL=ABORT;DIVZERO=ABORT;INT_OVERFL=ABORT"
19+
MP_STACK_OVERFLOW="OFF"
20+
# to partially avoid KAI stupidity
21+
KMP_DUPLICATE_LIB_OK="TRUE"
22+
export GAUSS_EXEDIR GAUSS_ARCHDIR PATH GMAIN LD_LIBRARY_PATH F_ERROPT1 TRAP_FPE MP_STACK_OVERFLOW \
23+
KMP_DUPLICATE_LIB_OK G03BASIS GAUSS_LEXEDIR
24+
#set MOPAC
25+
export MOPAC_LICENSE=/opt/mopac/
26+
#
27+
export PYTHONPATH=/home/keceli/RMG/RMG-Py/PyDAS/build/lib.linux-x86_64-2.6:/home/keceli/RMG/RMG-Py/PyDQED:/home/keceli/local/lib/python2.6/site-packages:/opt/rmgqm/RDKit_2013_03_2:/usr/local/lib/python2.6/dist-packages:$PYTHONPATH
28+
export PATH=/home/keceli/kiler:/home/keceli/bin:/home/keceli/local/bin:/opt/mpich2-1.2.1p1/bin:/opt/intel/Compiler/11.0/074/bin/intel64:/opt/sge/bin/lx24-amd64:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/bin/mh:/opt/g03/g03:$PATH
29+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/Compiler/11.0/074/ipp/em64t/sharedlib:/opt/intel/Compiler/11.0/074/mkl/lib/em64t:/opt/intel/Compiler/11.0/074/tbb/em64t/cc4.1.0_libc2.4_kernel2.6.16.21/lib:/opt/intel/Compiler/11.0/074/lib/intel64:/opt/g03/g03:/usr/local/lib:/opt/rmgqm/RDKit_2013_03_2/bin:/opt/rmgqm/boost_1_44_0/lib:/opt/rmgqm/RDKit_2013_03_2/lib

rmgpy/qm/main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def checkAllSet(self):
6969
assert type(self.onlyCyclics) is BooleanType
7070
assert self.maxRadicalNumber is not None # but it can be 0
7171
assert type(self.maxRadicalNumber) is IntType
72+
logging.debug("QM settings are ok.")
7273

7374
class QMCalculator():
7475
"""
@@ -134,6 +135,7 @@ def checkPaths(self):
134135
raise Exception("RMG-Py 'bin' directory {0} does not exist.".format(self.settings.RMG_bin_path))
135136
if not os.path.isdir(self.settings.RMG_bin_path):
136137
raise Exception("RMG-Py 'bin' directory {0} is not a directory.".format(self.settings.RMG_bin_path))
138+
logging.debug("QM paths are ok.")
137139

138140

139141
def getThermoData(self, molecule):
@@ -150,15 +152,20 @@ def getThermoData(self, molecule):
150152
gaussian: Only PM3 is available.
151153
"""
152154
if self.settings.software == 'mopac' or self.settings.software == 'mopacPM3':
155+
logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
153156
qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM3(molecule, self.settings)
154157
thermo0 = qm_molecule_calculator.generateThermoData()
158+
logging.debug("{0} calculation attempted.".format(self.settings.software))
155159
elif self.settings.software == 'mopacPM6':
160+
logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
156161
qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM6(molecule, self.settings)
157162
thermo0 = qm_molecule_calculator.generateThermoData()
158163
elif self.settings.software == 'mopacPM7':
164+
logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
159165
qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM7(molecule, self.settings)
160166
thermo0 = qm_molecule_calculator.generateThermoData()
161167
elif self.settings.software == 'gaussian':
168+
logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
162169
qm_molecule_calculator = rmgpy.qm.gaussian.GaussianMolPM3(molecule, self.settings)
163170
thermo0 = qm_molecule_calculator.generateThermoData()
164171
else:

rmgpy/qm/molecule.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def generateQMData(self):
204204
"""
205205
Calculate the QM data and return a QMData object, or None if it fails.
206206
"""
207+
logging.debug("{0} calculation".format(self.__class__.__name__))
207208
if self.verifyOutputFile():
208209
logging.info("Found a successful output file already; using that.")
209210
source = "QM {0} result file found from previous run.".format(self.__class__.__name__)
@@ -232,27 +233,32 @@ def generateThermoData(self):
232233
"""
233234
# First, see if we already have it.
234235
if self.loadThermoData():
236+
logging.debug("Already have thermo data")
235237
return self.thermo
236238

237239
# If not, generate the QM data
238240
self.qmData = self.generateQMData()
239241

240242
# If that fails, give up and return None.
241243
if self.qmData is None:
244+
logging.debug("QM data is not found")
242245
return None
243246

244247
self.determinePointGroup()
245248

246249
# If that fails, give up and return None.
247250
if self.pointGroup is None:
251+
logging.debug("No point group found")
248252
return None
249253

250254
self.calculateThermoData()
255+
logging.debug("Thermo data calculated")
251256
Cp0 = self.molecule.calculateCp0()
252257
CpInf = self.molecule.calculateCpInf()
253258
self.thermo.Cp0 = (Cp0,"J/(mol*K)")
254259
self.thermo.CpInf = (CpInf,"J/(mol*K)")
255260
self.saveThermoData()
261+
logging.debug("Thermo data saved")
256262
return self.thermo
257263

258264
def saveThermoData(self):
@@ -343,6 +349,7 @@ def calculateThermoData(self):
343349

344350
trans = rmgpy.statmech.IdealGasTranslation( mass=self.qmData.molecularMass )
345351
if self.pointGroup.linear:
352+
logging.debug("Linear molecule")
346353
rot = rmgpy.statmech.LinearRotor(
347354
rotationalConstant = self.qmData.rotationalConstants,
348355
symmetry = self.pointGroup.symmetryNumber,

rmgpy/qm/mopac.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@ class Mopac:
2020
mopacEnv = os.getenv('MOPAC_DIR', default="/opt/mopac")
2121
if os.path.exists(os.path.join(mopacEnv , 'MOPAC2012.exe')):
2222
executablePath = os.path.join(mopacEnv , 'MOPAC2012.exe')
23+
logging.debug("{0} is found.".format(executablePath))
2324
elif os.path.exists(os.path.join(mopacEnv , 'MOPAC2009.exe')):
2425
executablePath = os.path.join(mopacEnv , 'MOPAC2009.exe')
26+
logging.debug("{0} is found.".format(executablePath))
2527
else:
2628
executablePath = os.path.join(mopacEnv , '(MOPAC 2009 or 2012)')
29+
logging.debug("{0} is found.".format(executablePath))
2730

2831
usePolar = False #use polar keyword in MOPAC
2932

@@ -56,6 +59,7 @@ class Mopac:
5659

5760
def testReady(self):
5861
if not os.path.exists(self.executablePath):
62+
logging.debug("{0} is not found.").format(self.executablePath)
5963
raise Exception("Couldn't find MOPAC executable at {0}. Try setting your MOPAC_DIR environment variable.".format(self.executablePath))
6064

6165
def run(self):

rmgpy/rmg/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,7 @@ def initializeLog(verbose, log_file_name):
10801080
logging.addLevelName(logging.ERROR, 'Error: ')
10811081
logging.addLevelName(logging.WARNING, 'Warning: ')
10821082
logging.addLevelName(logging.INFO, '')
1083-
logging.addLevelName(logging.DEBUG, '')
1083+
logging.addLevelName(logging.DEBUG, 'Debug:')
10841084
logging.addLevelName(0, '')
10851085

10861086
# Create formatter and add to console handler

rmgpy/rmg/model.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import itertools
4040

4141
import scoop
42-
from scoop import futures
42+
from scoop import futures,shared
4343

4444
from rmgpy.display import display
4545
#import rmgpy.chemkin
@@ -64,22 +64,28 @@
6464

6565
__database = None
6666

67-
def makeThermoForSpecies(spec,qmValue=None):
67+
def makeThermoForSpecies(spec):
6868
"""
6969
Make thermo for a species.
7070
"""
71+
import logging
72+
qmValue=shared.getConst('qmValue')
73+
if qmValue: logging.debug("qmValue fine @ makeThermoForSpecies")
7174
global __database
7275
if __database == None:
7376
"""Load the database from some pickle file"""
74-
import cPickle, logging
77+
import cPickle
7578
filename = scoop.shared.getConst('databaseFile')
7679
database_hash = scoop.shared.getConst('databaseHash')
80+
logging.debug('Loading database pickle2 file'.format(filename))
7781
#logging.info('Loading database pickle2 file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
7882
f = open(filename, 'rb')
7983
__database = cPickle.load(f)
8084
f.close()
8185
assert __database.hash == database_hash, "Database loaded from {0!r} doesn't match expected hash!".format(filename)
86+
logging.debug("Generate thermo data in makeThermoForSpecies")
8287
spec.generateThermoData(__database,quantumMechanics=qmValue)
88+
logging.debug("Thermo generated for {0}".format(spec.label))
8389
return spec.thermo
8490

8591
################################################################################
@@ -117,17 +123,18 @@ def generateThermoData(self, database, thermoClass=NASA, quantumMechanics=None):
117123
from rmgpy.data.thermo import saveEntry
118124

119125
thermo0 = None
120-
121126
thermo0 = database.thermo.getThermoDataFromLibraries(self)
122-
127+
if quantumMechanics is None : logging.debug("qmValue is None at generateThermoData in model.py")
123128
if thermo0 is not None:
124-
logging.info("Found thermo for {0} in thermo library".format(self.label))
129+
logging.debug("Found thermo for {0} in thermo library".format(self.label))
125130
assert len(thermo0) == 3, "thermo0 should be a tuple at this point: (thermoData, library, entry)"
126131
thermo0 = thermo0[0]
127132

128133
elif quantumMechanics:
134+
logging.debug("Generate thermo data with QM")
129135
molecule = self.molecule[0]
130136
if quantumMechanics.settings.onlyCyclics and not molecule.isCyclic():
137+
logging.debug("Bypassing QM for ".format(self.label))
131138
pass
132139
else: # try a QM calculation
133140
if molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber:
@@ -162,10 +169,12 @@ def generateThermoData(self, database, thermoClass=NASA, quantumMechanics=None):
162169
f.write('{0}\n'.format(molecule.toSMILES()))
163170
f.write('{0}\n\n'.format(molecule.toAdjacencyList(removeH=True)))
164171
else: # Not too many radicals: do a direct calculation.
172+
logging.debug("Generate thermo for {0} with QM".format(self.label))
165173
thermo0 = quantumMechanics.getThermoData(molecule) # returns None if it fails
166-
174+
if thermo0 is None: logging.debug("QM for {0} failed.".format(self.label))
167175
if thermo0 is not None:
168176
# Write the QM molecule thermo to a library so that can be used in future RMG jobs.
177+
logging.debug("QM for {0} is successful.".format(self.label))
169178
quantumMechanics.database.loadEntry(index = len(quantumMechanics.database.entries) + 1,
170179
label = molecule.toSMILES(),
171180
molecule = molecule.toAdjacencyList(),

thermoEstimator.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
"""
1010

1111
import os.path
12-
from rmgpy.rmg.main import RMG, processProfileStats, makeProfileGraph
12+
import logging
13+
from rmgpy.rmg.main import RMG, initializeLog, processProfileStats, makeProfileGraph
1314
from rmgpy.data.thermo import ThermoLibrary
1415
from rmgpy.chemkin import writeThermoEntry
1516
from rmgpy.rmg.model import makeThermoForSpecies
16-
import scoop
17-
from scoop import futures
17+
from scoop import futures,shared
1818
################################################################################
1919
def chunks(l, n):
2020
""" Yield successive n-sized chunks from l.
@@ -34,6 +34,7 @@ def runThermoEstimator(inputFile):
3434
# initialize and load the database as well as any QM settings
3535
rmg.loadDatabase()
3636
if rmg.quantumMechanics:
37+
logging.debug("Initialize QM")
3738
rmg.quantumMechanics.initialize()
3839

3940
# Generate the thermo for all the species and write them to chemkin format as well as
@@ -44,9 +45,13 @@ def runThermoEstimator(inputFile):
4445
# species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
4546
listOfSpecies=rmg.initialSpecies
4647
chunksize=50
48+
if rmg.reactionModel.quantumMechanics: logging.debug("qmValue fine @ runThermoEstimator")
49+
shared.setConst(qmValue=rmg.reactionModel.quantumMechanics)
4750
for chunk in list(chunks(listOfSpecies,chunksize)):
48-
outputList = futures.map(makeThermoForSpecies, chunk,qmValue=rmg.reactionModel.quantumMechanics)
51+
outputList = futures.map(makeThermoForSpecies, chunk)
52+
logging.debug("mapped")
4953
for species, thermo in zip(chunk, outputList):
54+
logging.debug("specie {0}".format(species.label))
5055
species.thermo = thermo
5156
library.loadEntry(
5257
index = len(library.entries) + 1,
@@ -55,6 +60,7 @@ def runThermoEstimator(inputFile):
5560
thermo = species.thermo.toThermoData(),
5661
shortDesc = species.thermo.comment,
5762
)
63+
logging.debug("chunk done")
5864
output.write(writeThermoEntry(species))
5965
output.write('\n')
6066

@@ -69,15 +75,16 @@ def runThermoEstimator(inputFile):
6975
import argparse
7076

7177
parser = argparse.ArgumentParser()
72-
parser.add_argument('input', metavar='INPUT', type=str, nargs=1,
78+
parser.add_argument('input', metavar='FILE', type=str, nargs=1,
7379
help='Thermo input file')
7480
parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
7581
parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
7682

7783
args = parser.parse_args()
7884

7985
inputFile = os.path.abspath(args.input[0])
80-
86+
inputDirectory = os.path.abspath(os.path.dirname(args.input[0]))
87+
8188
if args.postprocess:
8289
print "Postprocessing the profiler statistics (will be appended to thermo.log)"
8390
print "Use `dot -Tpdf thermo_profile.dot -o thermo_profile.pdf`"
@@ -94,9 +101,12 @@ def runThermoEstimator(inputFile):
94101
# actually run the program!
95102
cProfile.runctx(command, global_vars, local_vars, stats_file)
96103
# postprocess the stats
97-
log_file = 'thermo.log'
104+
log_file = os.path.join(inputDirectory,'RMG.log')
98105
processProfileStats(stats_file, log_file)
99106
makeProfileGraph(stats_file)
100107

101108
else:
109+
level = logging.DEBUG
110+
initializeLog(level, 'thermo.log')
111+
logging.debug("runThermoEstimator...")
102112
runThermoEstimator(inputFile)

0 commit comments

Comments
 (0)