Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1] Dna exten/itp #313

Merged
merged 22 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
289e81c
add function to generate complementary DNA strand for dsDNA
fgrunewald Mar 8, 2022
5a0ec00
add test for function to generate complementary DNA strand for dsDNA
fgrunewald Mar 8, 2022
ec442ca
fix test and add circular test-case
fgrunewald Mar 8, 2022
3510abd
keep edge attributes
fgrunewald Mar 8, 2022
48b135f
carry over edge attributes
fgrunewald Mar 9, 2022
38408b7
add link for 3->5 direction
fgrunewald Mar 9, 2022
2506a0a
fix 3,5 link to only apply in one specific case
fgrunewald Mar 9, 2022
07a4759
use standard convention for DNA
fgrunewald Mar 9, 2022
53d9c57
add dsDNA test
fgrunewald Mar 9, 2022
509103b
implement the proper DNA convention when completing strands from sequ…
fgrunewald Apr 5, 2022
6896986
make max_resid a class variable that gets updated automagically; upda…
fgrunewald Apr 5, 2022
91db5ff
fix DNA API in gen_itp
fgrunewald Aug 14, 2023
bf501a1
add backmap attribute in add_node function
fgrunewald Aug 14, 2023
99e6d00
add circle link
fgrunewald Aug 15, 2023
c0c3082
add test for unkown DNA residue error
fgrunewald Aug 15, 2023
e9cefed
fix error message in gen_dna
fgrunewald Aug 15, 2023
f643e72
add warning for ig format missing title line
fgrunewald Aug 15, 2023
918cbdc
fix test & iterator
fgrunewald Aug 16, 2023
0914f41
fix logging for ig format error & add test
fgrunewald Aug 16, 2023
9b8dc48
remove nodes also from fragment graphs; check missing edges for all m…
fgrunewald Aug 16, 2023
c36392a
minor opt
fgrunewald Sep 26, 2023
f9117f5
minor opt
fgrunewald Sep 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
help='A linear sequence of residue names.')
seq_group.add_argument('-seqf', dest='seq_file', type=Path,
help='A graph input file (JSON|TXT|FASTA|IG)')
dna_group = parser_gen_itp.add_argument_group('DNA specifc options')
dna_group.add_argument('-dsdna', dest='dsdna', action='store_true',
help='complement single sequence to dsDNA sequence')

parser_gen_itp.set_defaults(func=gen_itp)

Expand Down
25 changes: 25 additions & 0 deletions polyply/data/martini2/DNA_M2.ff
Original file line number Diff line number Diff line change
Expand Up @@ -566,3 +566,28 @@ BB3 +BB2
BB2 BB3 +BB1 +BB2 1 180.00000 2 3 {"group": "link"}
BB3 +BB1 +BB2 +BB3 9 85.00000 2 2 {"group": "link", "version": "1"}
BB3 +BB1 +BB2 +BB3 9 160.00000 2 3 {"group": "link", "version": "2"}

;
; CIRCULAR MARTINI DNA
;

[ link ]
resname "DT|DG|DA|DC"
[ atoms ]
BB3 { }
>BB1 { }
[ bonds ]
BB3 >BB1 1 0.35300 10000 {"group": "link-circle", "edge": false}
[ angles ]
BB2 BB3 >BB1 2 102.00000 150 {"group": "backbone-circle", "edge": false}
BB3 >BB1 >BB2 2 106.00000 75 {"group": "backbone-circle", "edge": false}
[ exclusions ]
#meta {"edge": false}
BB2 >BB1
BB3 >BB2
[ dihedrals ]
BB2 BB3 >BB1 >BB2 1 180.00000 2 3 {"group": "link-circle", "edge": false}
BB3 >BB1 >BB2 >BB3 9 85.00000 2 2 {"group": "link-circle", "version": "1", "edge": false}
BB3 >BB1 >BB2 >BB3 9 160.00000 2 3 {"group": "link-circle", "version": "2", "edge": false}
[ edges ]
BB3 >BB1 {"linktype": "circle"}
7 changes: 6 additions & 1 deletion polyply/src/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,12 @@ def run_molecule(self, meta_molecule):

# take care to remove nodes if there are any scheduled for removal
# we do this here becuase that's more efficent
molecule.remove_nodes_from(self.nodes_to_remove)
if self.nodes_to_remove:
molecule.remove_nodes_from(self.nodes_to_remove)
# make sure the residue graph is updated; this takes care that
# nodes are also removed from the fragment graphs in the
# meta_molecule.nodes['graph'] attribute
meta_molecule.relabel_and_redo_res_graph(mapping={})
# now we add all interactions but not the ones that contain the removed
# nodes
for inter_type in self.applied_links:
Expand Down
106 changes: 106 additions & 0 deletions polyply/src/gen_dna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright 2022 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
from tqdm import tqdm

BASE_LIBRARY = {"DA": "DT", "DT": "DA", "DG": "DC", "DC": "DG",
"DA5": "DT3", "DT5": "DA3", "DG5": "DC3", "DC5": "DG3",
"DA3": "DT5", "DT3": "DA5", "DG3": "DC5", "DC3": "DG5"
}

def _dna_edge_iterator(meta_molecule, source):
"""
Traversal method for DNA specifc meta-molecule.
Should become obsolete once vermouth commits to
directed graphs.
"""
first_node = source
count = 0
while True:
neighbors = meta_molecule.neighbors(source)
src_resid = meta_molecule.nodes[source]["resid"]
for next_node in neighbors:
next_resid = meta_molecule.nodes[next_node]["resid"]
diff = src_resid - next_resid
if diff == 1 :
yield (source, next_node)
source = next_node
break

if next_resid > src_resid and next_node == first_node:
yield (source, next_node)
return
else:
return

def complement_dsDNA(meta_molecule):
"""
Given a meta-molecule, which represents the residue graph
of a single strand of dsDNA add the complementray strand
to get a meta-molecule with two disconnected strands. The
update of the meta_molecule is done in place.

By convention the other strand is generate 5'-> 3' end,
meaning that the last residue of the first strand aligns
with the first residue of the second strand.

Parameters
----------
meta_molecule: :class:`polyply.src.meta_molecule.MetaMolecule`

Raises
------
IOError
when the resname does not match any of the know base-pair
names an error is raised.
"""
last_node = list(meta_molecule.nodes)[-1]
resname = BASE_LIBRARY[meta_molecule.nodes[last_node]["resname"]]
meta_molecule.add_monomer(last_node+1, resname, [])

correspondance = {last_node: last_node+1}
total = last_node+1

pbar = tqdm(total=len(meta_molecule.nodes))
for prev_node, next_node in _dna_edge_iterator(meta_molecule, source=last_node):
try:
resname = BASE_LIBRARY[meta_molecule.nodes[next_node]["resname"]]
except KeyError:
msg = ("Trying to complete a dsDNA strand. However, resname {resname} with resid {resid} "
"does not match any of the known base-pair resnames. Note that polyply "
"at the moment only allows base-pair completion for molecules that only "
"consist of dsDNA. Please conact the developers if you whish to create a "
"more complicated molecule.")

resname = meta_molecule.nodes[next_node]["resname"]
resid = meta_molecule.nodes[next_node]["resid"]
raise IOError(msg.format(resname=resname, resid=resid))

if next_node not in correspondance:
new_node = total + 1
meta_molecule.add_monomer(total+1, resname, [(correspondance[prev_node], total+1)])
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
correspondance[next_node] = new_node
else:
new_node = correspondance[next_node]
meta_molecule.add_edge(correspondance[prev_node], new_node)

# make sure that edge attributes are carried over
for attr, value in meta_molecule.edges[(prev_node, next_node)].items():
meta_molecule.edges[(correspondance[prev_node], new_node)][attr] = value

total += 1
pbar.update(1)
pbar.close()
return meta_molecule

16 changes: 9 additions & 7 deletions polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from polyply import (MetaMolecule, ApplyLinks, Monomer, MapToMolecule)
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA

LOGGER = StyleAdapter(get_logger(__name__))

Expand All @@ -59,7 +60,7 @@
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None):
def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
"""
Top level function for running the polyply parameter generation.
Parameters seq and seq_file are mutually exclusive. Set the other
Expand Down Expand Up @@ -96,19 +97,20 @@
LOGGER.info("reading sequence from file", type="step")
meta_molecule = MetaMolecule.from_sequence_file(force_field, seq_file, name)

# Generate complementary DNA strand
if dsdna:
complement_dsDNA(meta_molecule)

Check warning on line 102 in polyply/src/gen_itp.py

View check run for this annotation

Codecov / codecov/patch

polyply/src/gen_itp.py#L102

Added line #L102 was not covered by tests

# Do transformationa and apply link
LOGGER.info("mapping sequence to molecule", type="step")
meta_molecule = MapToMolecule(force_field).run_molecule(meta_molecule)
LOGGER.info("applying links between residues", type="step")
meta_molecule = ApplyLinks().run_molecule(meta_molecule)

# Raise warning if molecule is disconnected
if not nx.is_connected(meta_molecule.molecule):
LOGGER.warning("Your molecule consists of disjoint parts."
"Perhaps links were not applied correctly.")
msg = "Missing link between residue {idxA} {resA} and residue {idxB} {resB}"
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
LOGGER.warning(msg, **missing)
msg = "Missing a link between residue {idxA} {resA} and residue {idxB} {resB}."
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
LOGGER.warning(msg, **missing)

Check warning on line 113 in polyply/src/gen_itp.py

View check run for this annotation

Codecov / codecov/patch

polyply/src/gen_itp.py#L113

Added line #L113 was not covered by tests

with deferred_open(outpath, 'w') as outfile:
header = [ ' '.join(sys.argv) + "\n" ]
Expand Down
19 changes: 10 additions & 9 deletions polyply/src/meta_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(self, *args, **kwargs):
self.__search_tree = None
self.root = None
self.dfs = False
self.max_resid = 0

# add resids to polyply meta-molecule nodes if they are not
# present. All algorithms rely on proper resids
Expand All @@ -117,6 +118,13 @@ def __init__(self, *args, **kwargs):
msg = "Couldn't add 1 to node. Either provide resids or use integers as node keys."
raise IOError(msg)

if self.max_resid < self.nodes[node]["resid"]:
self.max_resid = self.nodes[node]["resid"]

def add_node(self, *args, **kwargs):
self.max_resid = self.max_resid + 1
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
kwargs["resid"] = self.max_resid
super().add_node(*args, **kwargs)

def add_monomer(self, current, resname, connections):
"""
Expand All @@ -125,14 +133,7 @@ def add_monomer(self, current, resname, connections):
that matches may only refer to already existing nodes.
But connections can be an empty list.
"""
resids = nx.get_node_attributes(self, "resid")

if resids:
resid = max(resids.values()) + 1
else:
resid = 1

self.add_node(current, resname=resname, resid=resid, build=True, backmap=True)
self.add_node(current, resname=resname, build=True, backmap=True)
for edge in connections:
if self.has_node(edge[0]) and self.has_node(edge[1]):
self.add_edge(edge[0], edge[1])
Expand All @@ -155,7 +156,7 @@ def relabel_and_redo_res_graph(self, mapping):
mapping of node-key to new residue name
"""
# find the maximum resiude id
max_resid = max(nx.get_node_attributes(self.molecule, "resid").values())
max_resid = self.max_resid
# resname the residues and increase with pseudo-resid
for node, resname in mapping.items():
self.molecule.nodes[node]["resname"] = resname
Expand Down
5 changes: 4 additions & 1 deletion polyply/src/simple_seq_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _identify_residues(comments):

if "RNA" in comment:
RNA = True

if "PROTEIN" in comment:
AA = True

Expand Down Expand Up @@ -253,6 +253,9 @@ def parse_ig(filepath):
raise FileFormatError(msg)

DNA, RNA, AA = _identify_residues(comments)
if set(clean_lines[0]).issubset(set(['A', 'C', 'G', 'T'])):
LOGGER.warning("Found only the letters A, C, G, T on first line. Are you missing the title line in your .ig file?")

seq_graph = _parse_plain(clean_lines[1:], DNA=DNA, RNA=RNA, AA=AA)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

if ter_char == '2':
Expand Down
Loading
Loading