Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 109 additions & 43 deletions src/haddock/libs/libaa2cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,26 @@
- Implemented feature to check if nucleic acid is a candidate for hbond (Rodrigo Honorato 2018)
"""

import collections
import itertools
import math
import os
import random
import subprocess
import tempfile
import warnings

from pathlib import Path
from haddock import log
import tempfile
import collections
import math
from io import StringIO

from Bio.PDB import Entity
from Bio.PDB import PDBIO
from Bio.PDB import PDBParser
from Bio.PDB import Entity, PDBIO, PDBParser
from Bio.PDB.Structure import Structure
from Bio.PDB.StructureBuilder import StructureBuilder

from haddock import log
from haddock.core.exceptions import ModuleError
from haddock.core.typing import Optional
from haddock.libs.libontology import Format

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -143,8 +146,8 @@ def typesub(seq, patterns, types):

def ss_classification(ss, program="dssp"):
"""
Translates a string encoding the secondary structure to a string of corresponding Martini types, taking the
origin of the secondary structure into account, and replacing termini if requested.
Translates a string encoding the secondary structure to a string of corresponding Martini types, taking the
origin of the secondary structure into account, and replacing termini if requested.

Args:
ss:
Expand Down Expand Up @@ -544,7 +547,7 @@ def center_of_mass(entity, geometric=False):
return [sum(coord_list) / sum(masses) for coord_list in w_pos]


def determine_hbonds(structure):
def determine_hbonds(structure: Structure):
"""

Args:
Expand Down Expand Up @@ -752,8 +755,8 @@ def create_file_with_cryst(pdb_file: str) -> None:
return file_out.name


def determine_ss(structure, skipss, pdbf_path):
"""
def determine_ss(structure: Structure, skipss: bool, pdbf_path: str) -> Structure:
"""Determine secondary structures from input structure

Args:
structure:
Expand Down Expand Up @@ -807,39 +810,43 @@ def determine_ss(structure, skipss, pdbf_path):
return structure


def rename_nucbases(structure):
"""

Args:
structure:

Returns:
def rename_nucbases(structure: Structure) -> None:
"""Inplace residue renaming according to HADDOCK ones.

Parameters
----------
structure : Bio.PDB.Structure.Structure
Input structure
"""
chainresdic = dict([(c.get_id(),
[r.get_resname() for r in c.get_residues()]) for m in structure for c in m])
chainresdic = {
c.get_id(): [r.get_resname() for r in c.get_residues()]
for m in structure
for c in m
}

nucleotide_list = ["CYT", "C", "DC", "THY", "T", "DT", "ADE",
"A", "DA", "G", "GUA", "DG", "U", "URI"]
rna_resname_mapper = {"CYT": "C", "URI": "U", "ADE": "A", "GUA": "G"}
dna_rename_mapper = {"CYT": "DC", "THY": "DT", "ADE": "DA", "GUA": "DG"}

if [True for c in chainresdic for e in chainresdic[c] if e in nucleotide_list]:

if [True for c in chainresdic for e in chainresdic[c] if e in ["U", "URI"]]:
# CG needs 1 letter for RNA
ref_dic = {"CYT": "C", "URI": "U", "ADE": "A", "GUA": "G"}
else:
# CG needs 2 letters for DNA
ref_dic = {"CYT": "DC", "THY": "DT", "ADE": "DA", "GUA": "DG"}

# Check if this is an RNA
is_rna = [True for c in chainresdic for e in chainresdic[c] if e in ["U", "URI"]]
ref_dic = rna_resname_mapper if is_rna else dna_rename_mapper
# Loop over models
for model in structure:
for chain in model:
for r in chain.get_residues():
if r.resname in ref_dic.keys():
# rename
# Rename residue name
r.resname = ref_dic[r.resname]


def martinize(input_pdb: str, output_path: str, skipss: bool):
def martinize(
input_pdb: str,
output_path: str,
skipss: bool,
) -> tuple[str, bool]:
"""
Converts an all-atom (AA) PDB structure into a coarse-grained (CG) model
using a MARTINI2.2 mapping and generating CG-to-AA restraints for backmapping.
Expand Down Expand Up @@ -944,15 +951,21 @@ def martinize(input_pdb: str, output_path: str, skipss: bool):

cg_model = structure_builder.get_structure()

# Write CG structure
cg_pdb_name = f"../{output_path}/{Path(pdbf_path).stem}_cg.pdb"
# Write pre-CG structure
io.set_structure(cg_model)
io.save("temp.pdb", write_end=1)

# Setup in-memory text buffer
io_file = StringIO()
# Write file in it
io.save(io_file, write_end=1)
# Go back to the start of the file to read it
io_file.seek(0)

# Write the actual valid CG structure
# make sure atom names are in the correct place
# .BB. .BB1. .BB2. and not BB.. BB1.. BB2..
cg_pdb_name = gen_cg_filename(f"../{output_path}", pdbf_path)
out = open(cg_pdb_name, "w")
for line in open("temp.pdb", "r"):
for line in io_file.readlines():
if line.startswith("ATOM"):
atom_name = line[12:16].split()[0]
# mind the spacing
Expand All @@ -964,14 +977,67 @@ def martinize(input_pdb: str, output_path: str, skipss: bool):
n_l = line
out.write(n_l)
out.close()
Path("temp.pdb").unlink(missing_ok=True)
del io_file

# Write Restraints
tbl_file_name = f"../{output_path}/{Path(pdbf_path).stem}_cg_to_aa.tbl"
tbl_file = open(tbl_file_name, "w")
tbl_str = "\n".join([tbl for tbl in tbl_cg_to_aa if tbl])
tbl_file.write(f"\n{tbl_str}")
tbl_file.close()
# Write CG to AA backmapping restraint file
tbl_file_name = gen_cg_tbl_backmapping_fname(f"../{output_path}", pdbf_path)
with open(tbl_file_name, "w") as tbl_file:
tbl_file.write("\n" + "\n".join([tbl for tbl in tbl_cg_to_aa if tbl]))

return cg_pdb_name


def gen_cg_filename(
output_dir: str,
input_fname: str,
force_field: Optional[str] = None,
ext: Optional[str] = None,
) -> str:
"""Helper function to standarize CG filename from input file.

Parameters
----------
output_dir : str
Where to write the file.
input_fname : str
Name of the original input PDB file.
force_field : Optional[str], optional
Name of the force-field, by default None
ext : Optional[str], optional
File extension, by default None

Returns
-------
cg_fname : str
Name of the CG file.
"""
# Suffix for force-field if defined
ff_suffix = f"_{force_field}" if force_field else ""
# Set file extension
file_ext = ext if ext else Format.PDB
# Generate filepath
cg_fpath = Path(
output_dir,
f"{Path(input_fname).stem}_cg{ff_suffix}.{file_ext}"
)
cg_fname = str(cg_fpath)
return cg_fname


def gen_cg_tbl_backmapping_fname(output_dir: str, input_fname: str) -> Path:
"""Helper function to generate CG backmapping retraints filename.

Parameters
----------
output_dir : str
Where to write the file.
input_fname : str
Name of the original input PDB file.

Returns
-------
tbl_file_name: Path
Name of backmapping restraint filename.
"""
tbl_file_name = Path(output_dir, f"{Path(input_fname).stem}_cg_to_aa.tbl")
return tbl_file_name
46 changes: 30 additions & 16 deletions src/haddock/modules/topology/topocg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@
prepare_output,
prepare_single_input,
)
from haddock.libs.libaa2cg import (
martinize,
gen_cg_filename,
gen_cg_tbl_backmapping_fname,
)
from haddock.libs.libontology import Format, PDBFile, TopologyFile
from haddock.libs.libstructure import make_molecules
from haddock.libs.libsubprocess import CNSJob
from haddock.modules import get_engine
from haddock.modules.base_cns_module import BaseCNSModule

from haddock.libs.libaa2cg import martinize


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
Expand Down Expand Up @@ -235,9 +238,9 @@ def _run(self) -> None:

# Get psf files for aa topology
psf_files[i] = [
Path(mol.as_posix()[:-4]+".psf")
for mol in splited_models
]
Path(mol.parent, f"{mol.stem}.{Format.TOPOLOGY}")
for mol in splited_models
]

# get the MD5 hash of each model
ens_dic[i] = [
Expand Down Expand Up @@ -316,28 +319,39 @@ def _run(self) -> None:
md5_hash = None
else:
md5_hash = md5_dic[j]
origin_name_model = origin_names[j]
origin_name_model = model.stem

# FIXME: _model_id not used ?
try:
model_id = int(model.stem.split("_")[-2])
origin_name_model = ("_").join(model.stem.split("_"))
_model_id = int(model.stem.split("_")[-2])
except ValueError:
model_id = 0
origin_name_model = str(model.stem).split(".pdb")[0]

_model_id = 0

shape_mod = shape_dic[i]
if not shape_mod:
processed_pdb = Path(f"{origin_name_model}_cg_{force_field}.{Format.PDB}")
processed_topology = Path(f"{origin_name_model}_cg_{force_field}.{Format.TOPOLOGY}")
processed_cgtoaa_tbl=Path("../"+self.path.as_posix()+"/"+origin_name_model+"_cg_to_aa.tbl")
processed_pdb = gen_cg_filename(
"",
origin_name_model,
force_field=force_field,
)
processed_topology = gen_cg_filename(
"",
origin_name_model,
force_field=force_field,
ext=Format.TOPOLOGY,
)
processed_cgtoaa_tbl = gen_cg_tbl_backmapping_fname(
f"../{self.path.as_posix()}",
origin_name_model,
)
else:
processed_pdb = Path(f"{origin_name_model}.{Format.PDB}")
processed_topology = Path(f"{origin_name_model}.{Format.TOPOLOGY}")
processed_cgtoaa_tbl=None
processed_cgtoaa_tbl = None

topology = TopologyFile(processed_topology, path=".")
psf_file_uniq = psf_files[i][j].as_posix().split('/')
aa_topo_path = psf_file_uniq[0] + "/" + psf_file_uniq[1]
aa_topo_path = f"{psf_file_uniq[0]}/{psf_file_uniq[1]}"
aa_topology = TopologyFile(psf_file_uniq[2], path=aa_topo_path)
pdb = PDBFile(
file_name=processed_pdb,
Expand Down
Loading