2525 - Implemented feature to check if nucleic acid is a candidate for hbond (Rodrigo Honorato 2018)
2626"""
2727
28+ import collections
2829import itertools
30+ import math
2931import os
3032import random
3133import subprocess
34+ import tempfile
3235import warnings
36+
3337from pathlib import Path
34- from haddock import log
35- import tempfile
36- import collections
37- import math
38+ from io import StringIO
3839
39- from Bio .PDB import Entity
40- from Bio .PDB import PDBIO
41- from Bio .PDB import PDBParser
40+ from Bio .PDB import Entity , PDBIO , PDBParser
41+ from Bio .PDB .Structure import Structure
4242from Bio .PDB .StructureBuilder import StructureBuilder
4343
44+ from haddock import log
4445from haddock .core .exceptions import ModuleError
46+ from haddock .core .typing import Optional
47+ from haddock .libs .libontology import Format
4548
4649warnings .filterwarnings ("ignore" )
4750
@@ -143,8 +146,8 @@ def typesub(seq, patterns, types):
143146
144147def ss_classification (ss , program = "dssp" ):
145148 """
146- Translates a string encoding the secondary structure to a string of corresponding Martini types, taking the
147- origin of the secondary structure into account, and replacing termini if requested.
149+ Translates a string encoding the secondary structure to a string of corresponding Martini types, taking the
150+ origin of the secondary structure into account, and replacing termini if requested.
148151
149152 Args:
150153 ss:
@@ -544,7 +547,7 @@ def center_of_mass(entity, geometric=False):
544547 return [sum (coord_list ) / sum (masses ) for coord_list in w_pos ]
545548
546549
547- def determine_hbonds (structure ):
550+ def determine_hbonds (structure : Structure ):
548551 """
549552
550553 Args:
@@ -752,8 +755,8 @@ def create_file_with_cryst(pdb_file: str) -> None:
752755 return file_out .name
753756
754757
755- def determine_ss (structure , skipss , pdbf_path ) :
756- """
758+ def determine_ss (structure : Structure , skipss : bool , pdbf_path : str ) -> Structure :
759+ """Determine secondary structures from input structure
757760
758761 Args:
759762 structure:
@@ -807,39 +810,43 @@ def determine_ss(structure, skipss, pdbf_path):
807810 return structure
808811
809812
810- def rename_nucbases (structure ):
811- """
812-
813- Args:
814- structure:
815-
816- Returns:
813+ def rename_nucbases (structure : Structure ) -> None :
814+ """Inplace residue renaming according to HADDOCK ones.
817815
816+ Parameters
817+ ----------
818+ structure : Bio.PDB.Structure.Structure
819+ Input structure
818820 """
819- chainresdic = dict ([(c .get_id (),
820- [r .get_resname () for r in c .get_residues ()]) for m in structure for c in m ])
821+ chainresdic = {
822+ c .get_id (): [r .get_resname () for r in c .get_residues ()]
823+ for m in structure
824+ for c in m
825+ }
821826
822827 nucleotide_list = ["CYT" , "C" , "DC" , "THY" , "T" , "DT" , "ADE" ,
823828 "A" , "DA" , "G" , "GUA" , "DG" , "U" , "URI" ]
829+ rna_resname_mapper = {"CYT" : "C" , "URI" : "U" , "ADE" : "A" , "GUA" : "G" }
830+ dna_rename_mapper = {"CYT" : "DC" , "THY" : "DT" , "ADE" : "DA" , "GUA" : "DG" }
824831
825832 if [True for c in chainresdic for e in chainresdic [c ] if e in nucleotide_list ]:
826-
827- if [True for c in chainresdic for e in chainresdic [c ] if e in ["U" , "URI" ]]:
828- # CG needs 1 letter for RNA
829- ref_dic = {"CYT" : "C" , "URI" : "U" , "ADE" : "A" , "GUA" : "G" }
830- else :
831- # CG needs 2 letters for DNA
832- ref_dic = {"CYT" : "DC" , "THY" : "DT" , "ADE" : "DA" , "GUA" : "DG" }
833-
833+ # Check if this is an RNA
834+ is_rna = [True for c in chainresdic for e in chainresdic [c ] if e in ["U" , "URI" ]]
835+ ref_dic = rna_resname_mapper if is_rna else dna_rename_mapper
836+ # Loop over models
834837 for model in structure :
835838 for chain in model :
836839 for r in chain .get_residues ():
837840 if r .resname in ref_dic .keys ():
838- # rename
841+ # Rename residue name
839842 r .resname = ref_dic [r .resname ]
840843
841844
842- def martinize (input_pdb : str , output_path : str , skipss : bool ):
845+ def martinize (
846+ input_pdb : str ,
847+ output_path : str ,
848+ skipss : bool ,
849+ ) -> tuple [str , bool ]:
843850 """
844851 Converts an all-atom (AA) PDB structure into a coarse-grained (CG) model
845852 using a MARTINI2.2 mapping and generating CG-to-AA restraints for backmapping.
@@ -944,15 +951,21 @@ def martinize(input_pdb: str, output_path: str, skipss: bool):
944951
945952 cg_model = structure_builder .get_structure ()
946953
947- # Write CG structure
948- cg_pdb_name = f"{ output_path } /{ Path (pdbf_path ).stem } _cg.pdb"
954+ # Write pre-CG structure
949955 io .set_structure (cg_model )
950- io .save ("temp.pdb" , write_end = 1 )
951-
956+ # Setup in-memory text buffer
957+ io_file = StringIO ()
958+ # Write file in it
959+ io .save (io_file , write_end = 1 )
960+ # Go back to the start of the file to read it
961+ io_file .seek (0 )
962+
963+ # Write the actual valid CG structure
952964 # make sure atom names are in the correct place
953965 # .BB. .BB1. .BB2. and not BB.. BB1.. BB2..
966+ cg_pdb_name = gen_cg_filename (f"../{ output_path } " , pdbf_path )
954967 out = open (cg_pdb_name , "w" )
955- for line in open ( "temp.pdb" , "r" ):
968+ for line in io_file . readlines ( ):
956969 if line .startswith ("ATOM" ):
957970 atom_name = line [12 :16 ].split ()[0 ]
958971 # mind the spacing
@@ -964,14 +977,67 @@ def martinize(input_pdb: str, output_path: str, skipss: bool):
964977 n_l = line
965978 out .write (n_l )
966979 out .close ()
967- Path ( "temp.pdb" ). unlink ( missing_ok = True )
980+ del io_file
968981
969- # Write Restraints
970- tbl_file_name = f"{ output_path } /{ Path (pdbf_path ).stem } _cg_to_aa.tbl"
971- tbl_file = open (tbl_file_name , "w" )
972- tbl_str = "\n " .join ([tbl for tbl in tbl_cg_to_aa if tbl ])
973- tbl_file .write (f"\n { tbl_str } " )
974- tbl_file .close ()
982+ # Write CG to AA backmapping restraint file
983+ tbl_file_name = gen_cg_tbl_backmapping_fname (f"../{ output_path } " , pdbf_path )
984+ with open (tbl_file_name , "w" ) as tbl_file :
985+ tbl_file .write ("\n " + "\n " .join ([tbl for tbl in tbl_cg_to_aa if tbl ]))
975986
976987 return cg_pdb_name
977988
989+
990+ def gen_cg_filename (
991+ output_dir : str ,
992+ input_fname : str ,
993+ force_field : Optional [str ] = None ,
994+ ext : Optional [str ] = None ,
995+ ) -> str :
996+ """Helper function to standarize CG filename from input file.
997+
998+ Parameters
999+ ----------
1000+ output_dir : str
1001+ Where to write the file.
1002+ input_fname : str
1003+ Name of the original input PDB file.
1004+ force_field : Optional[str], optional
1005+ Name of the force-field, by default None
1006+ ext : Optional[str], optional
1007+ File extension, by default None
1008+
1009+ Returns
1010+ -------
1011+ cg_fname : str
1012+ Name of the CG file.
1013+ """
1014+ # Suffix for force-field if defined
1015+ ff_suffix = f"_{ force_field } " if force_field else ""
1016+ # Set file extension
1017+ file_ext = ext if ext else Format .PDB
1018+ # Generate filepath
1019+ cg_fpath = Path (
1020+ output_dir ,
1021+ f"{ Path (input_fname ).stem } _cg{ ff_suffix } .{ file_ext } "
1022+ )
1023+ cg_fname = str (cg_fpath )
1024+ return cg_fname
1025+
1026+
1027+ def gen_cg_tbl_backmapping_fname (output_dir : str , input_fname : str ) -> Path :
1028+ """Helper function to generate CG backmapping retraints filename.
1029+
1030+ Parameters
1031+ ----------
1032+ output_dir : str
1033+ Where to write the file.
1034+ input_fname : str
1035+ Name of the original input PDB file.
1036+
1037+ Returns
1038+ -------
1039+ tbl_file_name: Path
1040+ Name of backmapping restraint filename.
1041+ """
1042+ tbl_file_name = Path (output_dir , f"{ Path (input_fname ).stem } _cg_to_aa.tbl" )
1043+ return tbl_file_name
0 commit comments