Skip to content

Commit c8d2365

Browse files
authored
Merge pull request #949 from ReactionMechanismGenerator/tests1
Test and fix SMILES generation and parsing
2 parents 39d1176 + e67f0fc commit c8d2365

4 files changed

Lines changed: 570 additions & 6 deletions

File tree

rmgpy/molecule/generator.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
'CO2': 'O=C=O',
3737
'CO': '[C-]#[O+]',
3838
'C2H4': 'C=C',
39-
'O2': 'O=O'
39+
'O2': 'O=O',
40+
'C': '[C]', # for this to be in the "molecule" list it must be singlet with 2 lone pairs
4041
}
4142

4243
_known_smiles_radicals = {
@@ -47,7 +48,7 @@
4748
'HO2': '[O]O',
4849
'CH': '[CH]',
4950
'H': '[H]',
50-
'C': '[C]',
51+
'C': '[C]', # this, in the radical list, could be triplet or quintet.
5152
#'CO2': it could be [O][C][O] or O=[C][O]
5253
#'CO': '[C]=O', could also be [C][O]
5354
#'C2H4': could be [CH3][CH] or [CH2][CH2]
@@ -644,4 +645,4 @@ def find_lowest_p_layer(minmol, p_layer, equivalent_atoms):
644645
645646
TODO: The presence of unpaired electrons complicates stuff.
646647
"""
647-
return minmol
648+
return minmol

rmgpy/molecule/generatorTest.py

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,287 @@ def test_Nitrate(self):
453453
self.assertTrue(not ulayer)
454454
self.assertTrue(player.contains(P_LAYER_PREFIX + '1(0)'))
455455

456+
class SMILESGenerationTest(unittest.TestCase):
457+
def compare(self, adjlist, smiles):
458+
mol = Molecule().fromAdjacencyList(adjlist)
459+
self.assertEquals(smiles, mol.toSMILES())
460+
461+
def test_CH4(self):
462+
"Test the SMILES generation for methane"
456463

464+
adjlist = """
465+
1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
466+
2 H u0 p0 c0 {1,S}
467+
3 H u0 p0 c0 {1,S}
468+
4 H u0 p0 c0 {1,S}
469+
5 H u0 p0 c0 {1,S}
470+
"""
471+
smiles = "C"
472+
self.compare(adjlist, smiles)
473+
474+
def test_C(self):
475+
"Test the SMILES generation for atomic carbon mult=(1,3,5)"
476+
adjlist = "1 C u0 p2 c0"
477+
smiles = "[C]"
478+
self.compare(adjlist, smiles)
479+
480+
adjlist = "multiplicity 3\n1 C u2 p1 c0"
481+
smiles = "[C]"
482+
self.compare(adjlist, smiles)
483+
484+
adjlist = "multiplicity 5\n1 C u4 p0 c0"
485+
smiles = "[C]"
486+
self.compare(adjlist, smiles)
487+
488+
def test_various(self):
489+
"Test the SMILES generation for various molecules and radicals"
490+
491+
# Test N2
492+
adjlist = '''
493+
1 N u0 p1 c0 {2,T}
494+
2 N u0 p1 c0 {1,T}
495+
'''
496+
smiles = 'N#N'
497+
self.compare(adjlist, smiles)
498+
499+
# Test CH4
500+
adjlist = '''
501+
1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
502+
2 H u0 p0 c0 {1,S}
503+
3 H u0 p0 c0 {1,S}
504+
4 H u0 p0 c0 {1,S}
505+
5 H u0 p0 c0 {1,S}
506+
'''
507+
smiles = 'C'
508+
self.compare(adjlist, smiles)
509+
510+
511+
# Test H2O
512+
adjlist = '''
513+
1 O u0 p2 c0 {2,S} {3,S}
514+
2 H u0 p0 c0 {1,S}
515+
3 H u0 p0 c0 {1,S}
516+
'''
517+
smiles = 'O'
518+
self.compare(adjlist, smiles)
519+
520+
521+
# Test C2H6
522+
adjlist = '''
523+
1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
524+
2 C u0 p0 c0 {1,S} {6,S} {7,S} {8,S}
525+
3 H u0 p0 c0 {1,S}
526+
4 H u0 p0 c0 {1,S}
527+
5 H u0 p0 c0 {1,S}
528+
6 H u0 p0 c0 {2,S}
529+
7 H u0 p0 c0 {2,S}
530+
8 H u0 p0 c0 {2,S}
531+
'''
532+
smiles = 'CC'
533+
self.compare(adjlist, smiles)
534+
535+
536+
# Test H2
537+
adjlist = '''
538+
1 H u0 p0 c0 {2,S}
539+
2 H u0 p0 c0 {1,S}
540+
'''
541+
smiles = '[H][H]'
542+
self.compare(adjlist, smiles)
543+
544+
545+
# Test H2O2
546+
adjlist = '''
547+
1 O u0 p2 c0 {2,S} {3,S}
548+
2 O u0 p2 c0 {1,S} {4,S}
549+
3 H u0 p0 c0 {1,S}
550+
4 H u0 p0 c0 {2,S}
551+
'''
552+
smiles = 'OO'
553+
self.compare(adjlist, smiles)
554+
555+
556+
# Test C3H8
557+
adjlist = '''
558+
1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
559+
2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
560+
3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
561+
4 H u0 p0 c0 {1,S}
562+
5 H u0 p0 c0 {1,S}
563+
6 H u0 p0 c0 {1,S}
564+
7 H u0 p0 c0 {2,S}
565+
8 H u0 p0 c0 {2,S}
566+
9 H u0 p0 c0 {3,S}
567+
10 H u0 p0 c0 {3,S}
568+
11 H u0 p0 c0 {3,S}
569+
'''
570+
smiles = 'CCC'
571+
self.compare(adjlist, smiles)
572+
573+
574+
# Test Ar
575+
adjlist = '''
576+
1 Ar u0 p4 c0
577+
'''
578+
smiles = '[Ar]'
579+
self.compare(adjlist, smiles)
580+
581+
582+
# Test He
583+
adjlist = '''
584+
1 He u0 p1 c0
585+
'''
586+
smiles = '[He]'
587+
self.compare(adjlist, smiles)
588+
589+
590+
# Test CH4O
591+
adjlist = '''
592+
1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
593+
2 O u0 p2 c0 {1,S} {6,S}
594+
3 H u0 p0 c0 {1,S}
595+
4 H u0 p0 c0 {1,S}
596+
5 H u0 p0 c0 {1,S}
597+
6 H u0 p0 c0 {2,S}
598+
'''
599+
smiles = 'CO'
600+
self.compare(adjlist, smiles)
601+
602+
603+
# Test CO2
604+
adjlist = '''
605+
1 O u0 p2 c0 {2,D}
606+
2 C u0 p0 c0 {1,D} {3,D}
607+
3 O u0 p2 c0 {2,D}
608+
'''
609+
smiles = 'O=C=O'
610+
self.compare(adjlist, smiles)
611+
612+
613+
# Test CO
614+
adjlist = '''
615+
1 C u0 p1 c-1 {2,T}
616+
2 O u0 p1 c+1 {1,T}
617+
'''
618+
smiles = '[C-]#[O+]'
619+
self.compare(adjlist, smiles)
620+
621+
622+
# Test C2H4
623+
adjlist = '''
624+
1 C u0 p0 c0 {2,D} {3,S} {4,S}
625+
2 C u0 p0 c0 {1,D} {5,S} {6,S}
626+
3 H u0 p0 c0 {1,S}
627+
4 H u0 p0 c0 {1,S}
628+
5 H u0 p0 c0 {2,S}
629+
6 H u0 p0 c0 {2,S}
630+
'''
631+
smiles = 'C=C'
632+
self.compare(adjlist, smiles)
633+
634+
635+
# Test O2
636+
adjlist = '''
637+
1 O u0 p2 c0 {2,D}
638+
2 O u0 p2 c0 {1,D}
639+
'''
640+
smiles = 'O=O'
641+
self.compare(adjlist, smiles)
642+
643+
644+
# Test CH3
645+
adjlist = '''
646+
multiplicity 2
647+
1 C u1 p0 c0 {2,S} {3,S} {4,S}
648+
2 H u0 p0 c0 {1,S}
649+
3 H u0 p0 c0 {1,S}
650+
4 H u0 p0 c0 {1,S}
651+
'''
652+
smiles = '[CH3]'
653+
self.compare(adjlist, smiles)
654+
655+
656+
# Test HO
657+
adjlist = '''
658+
multiplicity 2
659+
1 O u1 p2 c0 {2,S}
660+
2 H u0 p0 c0 {1,S}
661+
'''
662+
smiles = '[OH]'
663+
self.compare(adjlist, smiles)
664+
665+
666+
# Test C2H5
667+
adjlist = '''
668+
multiplicity 2
669+
1 C u0 p0 c0 {2,S} {5,S} {6,S} {7,S}
670+
2 C u1 p0 c0 {1,S} {3,S} {4,S}
671+
3 H u0 p0 c0 {2,S}
672+
4 H u0 p0 c0 {2,S}
673+
5 H u0 p0 c0 {1,S}
674+
6 H u0 p0 c0 {1,S}
675+
7 H u0 p0 c0 {1,S}
676+
'''
677+
smiles = 'C[CH2]'
678+
self.compare(adjlist, smiles)
679+
680+
681+
# Test O
682+
adjlist = '''
683+
multiplicity 3
684+
1 O u2 p2 c0
685+
'''
686+
smiles = '[O]'
687+
self.compare(adjlist, smiles)
688+
689+
690+
# Test HO2
691+
adjlist = '''
692+
multiplicity 2
693+
1 O u1 p2 c0 {2,S}
694+
2 O u0 p2 c0 {1,S} {3,S}
695+
3 H u0 p0 c0 {2,S}
696+
'''
697+
smiles = '[O]O'
698+
self.compare(adjlist, smiles)
699+
700+
701+
# Test CH
702+
adjlist = '''
703+
multiplicity 4
704+
1 C u3 p0 c0 {2,S}
705+
2 H u0 p0 c0 {1,S}
706+
'''
707+
smiles = '[CH]'
708+
self.compare(adjlist, smiles)
709+
710+
711+
# Test H
712+
adjlist = '''
713+
multiplicity 2
714+
1 H u1 p0 c0
715+
'''
716+
smiles = '[H]'
717+
self.compare(adjlist, smiles)
718+
719+
720+
# Test C
721+
adjlist = '''
722+
multiplicity 5
723+
1 C u4 p0 c0
724+
'''
725+
smiles = '[C]'
726+
self.compare(adjlist, smiles)
727+
728+
729+
# Test O2
730+
adjlist = '''
731+
multiplicity 3
732+
1 O u1 p2 c0 {2,S}
733+
2 O u1 p2 c0 {1,S}
734+
'''
735+
smiles = '[O][O]'
736+
self.compare(adjlist, smiles)
457737

458738
if __name__ == '__main__':
459739
unittest.main()

rmgpy/molecule/parser.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,24 @@
4242
'He': '[He]',
4343
}
4444
SMILES_LOOKUPS = {
45-
'[He]':# RDKit improperly handles helium and returns it in a triplet state
45+
'[He]': # RDKit improperly handles helium and returns it in a triplet state
4646
"""
4747
He
4848
multiplicity 1
4949
1 He u0 p1
50+
""",
51+
'[Ar]': # RDKit improperly handles argon
5052
"""
53+
Ar
54+
multiplicity 1
55+
1 Ar u0 p4
56+
""",
57+
'[C]': # We'd return the quintuplet without this
58+
"""
59+
multiplicity 3
60+
1 C u2 p1 c0
61+
""",
62+
5163
}
5264

5365
def __fromSMILES(mol, smilesstr, backend):
@@ -335,6 +347,7 @@ def fromRDKitMol(mol, rdkitmol):
335347
mol.vertices = []
336348

337349
# Add hydrogen atoms to complete molecule if needed
350+
rdkitmol.UpdatePropertyCache(strict=False)
338351
rdkitmol = Chem.AddHs(rdkitmol)
339352
Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True)
340353

0 commit comments

Comments
 (0)