Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(protein) termini patching #374

Merged
merged 19 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ tests/test_data/gen_seq/output/*.itp
doc/build
doc/source/api
doc/source/.doctrees

/polyply/data/martini3007/*
csbrasnett marked this conversation as resolved.
Show resolved Hide resolved
7 changes: 7 additions & 0 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
dna_group = parser_gen_itp.add_argument_group('DNA specifc options')
dna_group.add_argument('-dsdna', dest='dsdna', action='store_true',
help='complement single sequence to dsDNA sequence')
modifications_group = parser_gen_itp.add_argument_group('Modifications group')
modifications_group.add_argument('-mods', dest='mods', action='append',
default=[], type=lambda s: s.split(":"),
help=('Add a modification to a residue. The format is '
'<resname><resid>:modification_name, '
'e.g. ASP1:N-ter')
)

parser_gen_itp.set_defaults(func=gen_itp)

Expand Down
53 changes: 0 additions & 53 deletions polyply/data/martini3/aminoacids.ff
Original file line number Diff line number Diff line change
Expand Up @@ -803,59 +803,6 @@ CA BB
[ edges ]
CA BB

[ link ]
resname $protein_resnames
[ atoms ]
CA { }
BB { }
+BB { }
[ exclusions ]
#meta {"comment": "CA-BB"}
CA +BB
[ edges ]
BB +BB
csbrasnett marked this conversation as resolved.
Show resolved Hide resolved

;; Protein termini. These links should be applied last.
[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1}}
[ non-edges ]
BB +BB

[ link ]
resname $protein_resnames
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P5", "charge": 0}}
[ non-edges ]
BB -BB

[ link ]
resname $protein_resnames
[ molmeta ]
neutral_termini true
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0}}
[ non-edges ]
BB +BB

;; Cystein bridge
[ link ]
resname "CYS"
Expand Down
37 changes: 37 additions & 0 deletions polyply/data/martini3/modifications.ff
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[ citations ]
Martini3

[ modification ]
C-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": -1.0}}

[ modification ]
N-ter
[ atoms ]
BB {"replace": {"atype": "Q5", "charge": 1.0}}

[ modification ]
zwitter
[ atoms ]
BB {"replace": {"atype": "Q5"}}

[ modification ]
COOH-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
NH2-ter
[ atoms ]
BB {"replace": {"atype": "P6", "charge": 0.0}}

[ modification ]
CCAP-ter
[ atoms ]
BB {"replace": {"atype": "P1", "charge": 0.0}}

[ modification ]
NCAP-ter
[ atoms ]
BB {"replace": {"atype": "P2", "charge": 0.0}}
16 changes: 8 additions & 8 deletions polyply/src/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,16 @@ def match_link_and_residue_atoms(meta_molecule, link, link_to_resid):
# relative resid has been asserted before so we can
# exclude it here
ignore = ['order', 'charge_group', 'replace', 'resid']
matchs = list(find_atoms(block, ignore=ignore, **attrs))
matches = list(find_atoms(block, ignore=ignore, **attrs))
csbrasnett marked this conversation as resolved.
Show resolved Hide resolved

if len(matchs) == 1:
link_to_mol[node] = matchs[0]
elif len(matchs) == 0:
msg = "Found no matchs for node {} in resiue {}. Cannot apply link."
if len(matches) == 1:
link_to_mol[node] = matches[0]
elif len(matches) == 0:
msg = "Found no matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(node, resid))
else:
msg = "Found {} matches for node {} in resiue {}. Cannot apply link."
raise MatchError(msg.format(len(matchs), node, resid))
raise MatchError(msg.format(len(matches), node, resid))

return link_to_mol

Expand Down Expand Up @@ -475,8 +475,8 @@ def run_molecule(self, meta_molecule):
res_link,
node_match=_res_match,
edge_match=_linktype_match)
raw_matchs = GM.subgraph_isomorphisms_iter()
for match in raw_matchs:
raw_matches = GM.subgraph_isomorphisms_iter()
for match in raw_matches:
nodes = match.keys()
resids =[meta_molecule.nodes[node]["resid"] for node in nodes]
orders = [ res_link.nodes[match[node]]["order"] for node in nodes]
Expand Down
116 changes: 113 additions & 3 deletions polyply/src/apply_modifications.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 University of Groningen
# Copyright 2024 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,6 +11,116 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import vermouth.molecule
from vermouth.log_helpers import StyleAdapter, get_logger
from vermouth.processors.annotate_mut_mod import parse_residue_spec
LOGGER = StyleAdapter(get_logger(__name__))
from .processor import Processor

class ApplyModifications():
pass
protein_resnames = "GLY|ALA|CYS|VAL|LEU|ILE|MET|PRO|HYP|ASN|GLN|ASP|ASP0|GLU|GLU0|THR|SER|LYS|LYS0|ARG|ARG0|HIS|HISH|PHE|TYR|TRP"

def _patch_protein_termini(meta_molecule, ter_mods=['N-ter', 'C-ter']):
"""
make a resspec for a protein with correct terminal modification
"""
protein_termini = [({'resid': 1, 'resname': meta_molecule.nodes[0]['resname']}, ter_mods[0])]
max_resid = meta_molecule.max_resid
last_node = max_resid - 1
last_resname = meta_molecule.nodes[last_node]['resname']
if len(ter_mods) > 1:
last_mod = ({'resid': max_resid, 'resname': last_resname}, ter_mods[1])
protein_termini.append(last_mod)
else:
# if only one mod in ter_mods, apply the mod to both start and end residue
LOGGER.info("Only one terminal modification specified. "
f"Will apply {ter_mods[0]} to both {meta_molecule.nodes[0]['resname']}1 and {last_resname}{max_resid}")
protein_termini.append(({'resid': max_resid, 'resname': last_resname}, ter_mods[0]))

return protein_termini


def apply_mod(meta_molecule, modifications):
"""

Apply a terminal modification.
Note this assumes that the modification is additive, ie. no atoms are
removed

Parameters
----------
meta_molecule: :class:`polyply.src.meta_molecule.MetaMolecule`
modifications: list
list of (resspec, modification) pairs to apply

Returns
----------
meta_molecule
"""

molecule = meta_molecule.molecule

if not molecule.force_field.modifications:
LOGGER.warning('No modifications present in forcefield, none will be applied')
return meta_molecule

for target, desired_mod in modifications:

target_resid = target['resid']

mod_atoms = {}
for mod_atom in molecule.force_field.modifications[desired_mod].atoms:
if 'replace' in mod_atom:
mod_atoms[mod_atom['atomname']] = mod_atom['replace']
else:
mod_atoms[mod_atom['atomname']] = {}

target_residue = meta_molecule.nodes[target_resid - 1]
# takes care to skip all residues that come from an itp file
if not target_residue.get('from_itp', 'False'):
LOGGER.warning("meta_molecule has come from itp. Will not attempt to modify.")
continue

Check warning on line 81 in polyply/src/apply_modifications.py

View check run for this annotation

Codecov / codecov/patch

polyply/src/apply_modifications.py#L80-L81

Added lines #L80 - L81 were not covered by tests
# checks that the resname is a protein resname as defined above
if not vermouth.molecule.attributes_match(target_residue,
{'resname': vermouth.molecule.Choice(protein_resnames.split("|"))}):
LOGGER.warning("The resname of your target residue is not recognised a protein resname. "
"Will not attempt to modify.")
continue

anum_dict = {}
# this gives you the correct node indices
for node in target_residue['graph'].nodes:
aname = molecule.nodes[node]['atomname']
if aname in mod_atoms.keys():
anum_dict[aname] = node + 1
for key, value in mod_atoms[aname].items():
molecule.nodes[node][key] = value

mod_interactions = molecule.force_field.modifications[desired_mod].interactions
for i in mod_interactions:
for j in molecule.force_field.modifications[desired_mod].interactions[i]:
molecule.add_interaction(i,
(anum_dict[j.atoms[0]]-1,
anum_dict[j.atoms[1]]-1),
j.parameters,
meta=j.meta)

return meta_molecule

class ApplyModifications(Processor):
"""
This processor takes a class:`polyply.src.MetaMolecule` and
based on modifications defined in the `force-field` attribute of the
MetaMolecule applies them when appropriate.

"""
def __init__(self, meta_molecule, modifications=[]):
self.target_mods = []
for resspec, val in modifications:
self.target_mods.append((parse_residue_spec(resspec), val))
if len(self.target_mods) == 0:
self.target_mods = _patch_protein_termini(meta_molecule)

def run_molecule(self, meta_molecule):

apply_mod(meta_molecule, self.target_mods)
return meta_molecule
9 changes: 8 additions & 1 deletion polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA
from .apply_modifications import ApplyModifications

LOGGER = StyleAdapter(get_logger(__name__))

Expand All @@ -60,7 +61,10 @@ def split_seq_string(sequence):
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[],
lib=None, seq=None, seq_file=None,
dsdna=False, mods=[], protter=False):

"""
Top level function for running the polyply parameter generation.
Parameters seq and seq_file are mutually exclusive. Set the other
Expand Down Expand Up @@ -107,6 +111,9 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None,
LOGGER.info("applying links between residues", type="step")
meta_molecule = ApplyLinks().run_molecule(meta_molecule)

meta_molecule = ApplyModifications(modifications=mods,
meta_molecule=meta_molecule).run_molecule(meta_molecule)

# Raise warning if molecule is disconnected
msg = "Missing a link between residue {idxA} {resA} and residue {idxB} {resB}."
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
Expand Down
7 changes: 7 additions & 0 deletions polyply/tests/example_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def example_meta_molecule():
3 8 12
"""
force_field = vermouth.forcefield.ForceField("test")

# force_field.modifications.update({'N-ter':{'PTM_atom': False,
# 'replace': {'atype': 'Q5', 'charge': 1.0},
# 'order': 0, 'atomname': 'BB'}}
# )


fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
block_A = Block(force_field=force_field)
block_A.add_nodes_from([(0 , {'resid': 1, 'atomname': 'BB', 'atype': 'A', 'charge': 0.0, 'other': 'A', 'resname': 'A'}),
(1 , {'resid': 1, 'atomname': 'BB1', 'atype': 'B', 'charge': 0.0, 'other': 'A', 'resname': 'A'}),
Expand Down
Loading
Loading