From 0f32be0fb0a4a68c13c4d0aebbb11613c2240a46 Mon Sep 17 00:00:00 2001 From: jan-stevens Date: Tue, 15 Aug 2023 10:08:23 +0200 Subject: [PATCH 1/4] allow user to choose bacmkapping protocol --- bin/polyply | 4 ++ polyply/src/backmap.py | 110 ++++------------------------- polyply/src/gen_coords.py | 5 +- polyply/src/orient_by_bonds.py | 124 +++++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 99 deletions(-) create mode 100644 polyply/src/orient_by_bonds.py diff --git a/bin/polyply b/bin/polyply index 3776c9e9..f377b42f 100755 --- a/bin/polyply +++ b/bin/polyply @@ -162,6 +162,10 @@ def main(): # pylint: disable=too-many-locals,too-many-statements ' when RW fails in first attempt')) backmap_group = parser_gen_coords.add_argument_group('Options for backmapping') + backmap_group.add_argument('-protocol', dest='protocol', type=str, default='default', + choices=['default'], + help='choose which backmapping protocol to use.') + backmap_group.add_argument('-back_fudge', dest='bfudge', type=float, default=0.4, help='factor by which to scale the coordinates when backmapping.') diff --git a/polyply/src/backmap.py b/polyply/src/backmap.py index b643c07b..262e7540 100644 --- a/polyply/src/backmap.py +++ b/polyply/src/backmap.py @@ -22,25 +22,20 @@ from .generate_templates import find_atoms from .linalg_functions import rotate_xyz from .graph_utils import find_connecting_edges +from .orient_by_bonds import orient_by_bonds """ Processor implementing a template based back mapping to lower resolution coordinates for a meta molecule. """ -def _norm_matrix(matrix): - norm = np.sum(matrix * matrix) - return norm -norm_matrix = jit(_norm_matrix) -def orient_template(meta_molecule, current_node, template, built_nodes): +def orient_template(meta_molecule, current_node, template, built_nodes, protocol): """ Given a `template` and a `node` of a `meta_molecule` at lower resolution - find the bonded interactions connecting the higher resolution template - to its neighbours and orient a template such that the atoms point torwards - the neighbours. In case some nodes of meta_molecule have already been built - at the lower resolution they can be provided as `built_nodes`. In case the - lower resolution is already built the atoms will be oriented torward the lower - resolution atom participating in the bonded interaction. + find the orientation of the template by chosen a protocol. + + Available protocols: + - by optimizing bonded interactions (i.e. 'default') Parameters: ----------- @@ -57,92 +52,9 @@ def orient_template(meta_molecule, current_node, template, built_nodes): dict the oriented template """ - # 1. find neighbours at meta_mol level - neighbours = nx.all_neighbors(meta_molecule, current_node) - current_resid = meta_molecule.nodes[current_node]["resid"] - - # 2. find connecting atoms at low-res level - edges = [] - ref_nodes = [] - for node in neighbours: - resid = meta_molecule.nodes[node]["resid"] - edge = find_connecting_edges(meta_molecule, - meta_molecule.molecule, - (node, current_node)) - edges += edge - ref_nodes.extend([node]*len(edge)) - - # 3. build coordinate system - ref_coords = np.zeros((3, len(edges))) - opt_coords = np.zeros((3, len(edges))) - - for ndx, edge in enumerate(edges): - for atom in edge: - resid = meta_molecule.molecule.nodes[atom]["resid"] - if resid == current_resid: - current_atom = atom - else: - ref_atom = atom - ref_resid = resid - - # the reference residue has already been build so we take the lower - # resolution coordinates as reference - if ref_resid in built_nodes: - atom_name = meta_molecule.molecule.nodes[current_atom]["atomname"] - - # record the coordinates of the atom that is rotated - opt_coords[:, ndx] = template[atom_name] - - # given the reference atom that already exits translate it to the origin - # of the rotation, this will be the reference point for rotation - ref_coords[:, ndx] = meta_molecule.molecule.nodes[ref_atom]["position"] -\ - meta_molecule.nodes[current_node]["position"] - - # the reference residue has not been build the CG center is taken as - # reference - else: - atom_name = meta_molecule.molecule.nodes[current_atom]["atomname"] - cg_node = ref_nodes[ndx] #find_atoms(meta_molecule, "resid", ref_resid)[0] - - # record the coordinates of the atom that is rotated - opt_coords[:, ndx] = template[atom_name] - - # as the reference atom is not built take the cg node as reference point - # for rotation; translate it to origin - ref_coords[:, ndx] = meta_molecule.nodes[cg_node]["position"] -\ - meta_molecule.nodes[current_node]["position"] - - - # 4. optimize the distance between reference nodes and nodes to be placed - # only using rotation of the complete template - #@profile - def target_function(angles): - rotated = rotate_xyz(opt_coords, angles[0], angles[1], angles[2]) - diff = rotated - ref_coords - score = norm_matrix(diff) - return score - - # choose random starting angles - angles = np.random.uniform(low=0, high=2*np.pi, size=(3)) - opt_results = scipy.optimize.minimize(target_function, angles, method='L-BFGS-B', - options={'ftol':0.01, 'maxiter': 400}) - - # 5. write the template as array and rotate it corrsponding to the result above - template_arr = np.zeros((3, len(template))) - key_to_ndx = {} - for ndx, key in enumerate(template.keys()): - template_arr[:, ndx] = template[key] - key_to_ndx[key] = ndx - - angles = opt_results['x'] - template_rotated_arr = rotate_xyz(template_arr, angles[0], angles[1], angles[2]) - - # 6. write the template back as dictionary - template_rotated = {} - for key, ndx in key_to_ndx.items(): - template_rotated[key] = template_rotated_arr[:, ndx] - return template_rotated + if protocol == "default": + return orient_by_bonds(meta_molecule, current_node, template, built_nodes) class Backmap(Processor): """ @@ -151,9 +63,10 @@ class Backmap(Processor): for the lower resolution molecule associated with the MetaMolecule. """ - def __init__(self, fudge_coords=0.4, *args, **kwargs): + def __init__(self, fudge_coords=0.4, protocol='default', *args, **kwargs): super().__init__(*args, **kwargs) self.fudge_coords = fudge_coords + self.protocol = protocol def _place_init_coords(self, meta_molecule): """ @@ -176,7 +89,8 @@ def _place_init_coords(self, meta_molecule): template = orient_template(meta_molecule, node, meta_molecule.templates[resname], - built_nodes) + built_nodes, + self.protocol) for atom_high in high_res_atoms: atomname = meta_molecule.molecule.nodes[atom_high]["atomname"] diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py index 54516cf3..0c4dddef 100644 --- a/polyply/src/gen_coords.py +++ b/polyply/src/gen_coords.py @@ -116,6 +116,7 @@ def gen_coords(toppath, max_force=5*10**4.0, nrewind=5, lib=None, + protocol='default', bfudge=0.4): """ Subprogram for coordinate generation which implements the default @@ -185,6 +186,8 @@ def gen_coords(toppath, nrewind: int Number of residues to trace back when random walk step fails in first attempt. The default is 5. + protocol: str + Name of the chosen backmapping protocol bfudge: float Fudge factor by which to scale the coordinates of the residues during the backmapping step. 1 will result in to-scale coordinates @@ -261,7 +264,7 @@ def gen_coords(toppath, nrewind=nrewind).run_system(topology.molecules) ligand_annotator.split_ligands() LOGGER.info("backmapping to target resolution", type="step") - Backmap(fudge_coords=bfudge).run_system(topology) + Backmap(fudge_coords=bfudge, protocol=protocol).run_system(topology) # Write output LOGGER.info("writing output", type="step") command = ' '.join(sys.argv) diff --git a/polyply/src/orient_by_bonds.py b/polyply/src/orient_by_bonds.py new file mode 100644 index 00000000..62b6fda2 --- /dev/null +++ b/polyply/src/orient_by_bonds.py @@ -0,0 +1,124 @@ +import numpy as np +import scipy.optimize +import networkx as nx +from polyply import jit +from .generate_templates import find_atoms +from .linalg_functions import rotate_xyz +from .graph_utils import find_connecting_edges + +def _norm_matrix(matrix): + norm = np.sum(matrix * matrix) + return norm +norm_matrix = jit(_norm_matrix) + +def orient_by_bonds(meta_molecule, current_node, template, built_nodes): + """ + Given a `template` and a `node` of a `meta_molecule` at lower resolution + find the bonded interactions connecting the higher resolution template + to its neighbours and orient a template such that the atoms point torwards + the neighbours. In case some nodes of meta_molecule have already been built + at the lower resolution they can be provided as `built_nodes`. In case the + lower resolution is already built the atoms will be oriented torward the lower + resolution atom participating in the bonded interaction. + + Parameters: + ----------- + meta_molecule: :class:`polyply.src.meta_molecule` + current_node: + node key of the node in meta_molecule to which template referes to + template: dict[collections.abc.Hashable, np.ndarray] + dict of positions referring to the lower resolution atoms of node + built_nodes: list + list of meta_molecule node keys of residues that are already built + + Returns: + -------- + dict + the oriented template + """ + # 1. find neighbours at meta_mol level + neighbours = nx.all_neighbors(meta_molecule, current_node) + current_resid = meta_molecule.nodes[current_node]["resid"] + + # 2. find connecting atoms at low-res level + edges = [] + ref_nodes = [] + for node in neighbours: + resid = meta_molecule.nodes[node]["resid"] + edge = find_connecting_edges(meta_molecule, + meta_molecule.molecule, + (node, current_node)) + edges += edge + ref_nodes.extend([node]*len(edge)) + + # 3. build coordinate system + ref_coords = np.zeros((3, len(edges))) + opt_coords = np.zeros((3, len(edges))) + + for ndx, edge in enumerate(edges): + for atom in edge: + resid = meta_molecule.molecule.nodes[atom]["resid"] + if resid == current_resid: + current_atom = atom + else: + ref_atom = atom + ref_resid = resid + + # the reference residue has already been build so we take the lower + # resolution coordinates as reference + if ref_resid in built_nodes: + atom_name = meta_molecule.molecule.nodes[current_atom]["atomname"] + + # record the coordinates of the atom that is rotated + opt_coords[:, ndx] = template[atom_name] + + # given the reference atom that already exits translate it to the origin + # of the rotation, this will be the reference point for rotation + ref_coords[:, ndx] = meta_molecule.molecule.nodes[ref_atom]["position"] -\ + meta_molecule.nodes[current_node]["position"] + + # the reference residue has not been build the CG center is taken as + # reference + else: + atom_name = meta_molecule.molecule.nodes[current_atom]["atomname"] + cg_node = ref_nodes[ndx] #find_atoms(meta_molecule, "resid", ref_resid)[0] + + # record the coordinates of the atom that is rotated + opt_coords[:, ndx] = template[atom_name] + + # as the reference atom is not built take the cg node as reference point + # for rotation; translate it to origin + ref_coords[:, ndx] = meta_molecule.nodes[cg_node]["position"] -\ + meta_molecule.nodes[current_node]["position"] + + + # 4. optimize the distance between reference nodes and nodes to be placed + # only using rotation of the complete template + #@profile + def target_function(angles): + rotated = rotate_xyz(opt_coords, angles[0], angles[1], angles[2]) + diff = rotated - ref_coords + score = norm_matrix(diff) + return score + + # choose random starting angles + angles = np.random.uniform(low=0, high=2*np.pi, size=(3)) + opt_results = scipy.optimize.minimize(target_function, angles, method='L-BFGS-B', + options={'ftol':0.01, 'maxiter': 400}) + + # 5. write the template as array and rotate it corrsponding to the result above + template_arr = np.zeros((3, len(template))) + key_to_ndx = {} + for ndx, key in enumerate(template.keys()): + template_arr[:, ndx] = template[key] + key_to_ndx[key] = ndx + + angles = opt_results['x'] + template_rotated_arr = rotate_xyz(template_arr, angles[0], angles[1], angles[2]) + + # 6. write the template back as dictionary + template_rotated = {} + for key, ndx in key_to_ndx.items(): + template_rotated[key] = template_rotated_arr[:, ndx] + + return template_rotated From f41d07afc3ba3c9f15354a4d3b81979a385c6e5c Mon Sep 17 00:00:00 2001 From: jan-stevens Date: Tue, 15 Aug 2023 10:14:18 +0200 Subject: [PATCH 2/4] move 'norm_matrix' to linal_funcs with future backmapping protocols in mind --- polyply/src/linalg_functions.py | 5 +++++ polyply/src/orient_by_bonds.py | 7 +------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/polyply/src/linalg_functions.py b/polyply/src/linalg_functions.py index 8663a59f..4bb78db3 100644 --- a/polyply/src/linalg_functions.py +++ b/polyply/src/linalg_functions.py @@ -17,6 +17,11 @@ from scipy.spatial.transform import Rotation from polyply import jit +def _norm_matrix(matrix): + norm = np.sum(matrix * matrix) + return norm +norm_matrix = jit(_norm_matrix) + def _vector_angle_degrees(v1, v2): """ Compute the angle between two vectors diff --git a/polyply/src/orient_by_bonds.py b/polyply/src/orient_by_bonds.py index 62b6fda2..423a2168 100644 --- a/polyply/src/orient_by_bonds.py +++ b/polyply/src/orient_by_bonds.py @@ -1,15 +1,10 @@ import numpy as np import scipy.optimize import networkx as nx -from polyply import jit from .generate_templates import find_atoms from .linalg_functions import rotate_xyz from .graph_utils import find_connecting_edges - -def _norm_matrix(matrix): - norm = np.sum(matrix * matrix) - return norm -norm_matrix = jit(_norm_matrix) +from .linalg_functions import norm_matrix def orient_by_bonds(meta_molecule, current_node, template, built_nodes): """ From 2683dafbf2d472f14c605a788218565b7af508c9 Mon Sep 17 00:00:00 2001 From: jan-stevens Date: Tue, 15 Aug 2023 10:21:52 +0200 Subject: [PATCH 3/4] fix docs string --- polyply/src/backmap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polyply/src/backmap.py b/polyply/src/backmap.py index 262e7540..bb6d7650 100644 --- a/polyply/src/backmap.py +++ b/polyply/src/backmap.py @@ -32,10 +32,10 @@ def orient_template(meta_molecule, current_node, template, built_nodes, protocol): """ Given a `template` and a `node` of a `meta_molecule` at lower resolution - find the orientation of the template by chosen a protocol. + find the orientation of the template by a chosen protocol. Available protocols: - - by optimizing bonded interactions (i.e. 'default') + - by optimizing bonded interactions (i.e. 'orient_by_bonds' / 'default') Parameters: ----------- From 4134f8639b049eaf93959cd1881fecdba0d7f41b Mon Sep 17 00:00:00 2001 From: jan-stevens Date: Tue, 15 Aug 2023 13:24:25 +0200 Subject: [PATCH 4/4] implement comments --- bin/polyply | 8 +++--- polyply/src/backmap.py | 52 +++++++++------------------------- polyply/src/gen_coords.py | 8 +++--- polyply/src/orient_by_bonds.py | 1 - 4 files changed, 21 insertions(+), 48 deletions(-) diff --git a/bin/polyply b/bin/polyply index f377b42f..159e9942 100755 --- a/bin/polyply +++ b/bin/polyply @@ -162,11 +162,11 @@ def main(): # pylint: disable=too-many-locals,too-many-statements ' when RW fails in first attempt')) backmap_group = parser_gen_coords.add_argument_group('Options for backmapping') - backmap_group.add_argument('-protocol', dest='protocol', type=str, default='default', - choices=['default'], - help='choose which backmapping protocol to use.') + backmap_group.add_argument('-bm_mode', dest='bmode', type=str, default='automatic', + choices=['automatic', 'by-bonds'], + help='choose which backmapping mode to use.') - backmap_group.add_argument('-back_fudge', dest='bfudge', type=float, default=0.4, + backmap_group.add_argument('-bm_fudge', dest='bfudge', type=float, default=0.4, help='factor by which to scale the coordinates when backmapping.') parser_gen_coords.set_defaults(func=gen_coords) diff --git a/polyply/src/backmap.py b/polyply/src/backmap.py index bb6d7650..d5e41ccd 100644 --- a/polyply/src/backmap.py +++ b/polyply/src/backmap.py @@ -11,17 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import multiprocessing -import itertools -import numpy as np -import scipy.optimize -import networkx as nx -from tqdm import tqdm -from polyply import jit + from .processor import Processor -from .generate_templates import find_atoms -from .linalg_functions import rotate_xyz -from .graph_utils import find_connecting_edges from .orient_by_bonds import orient_by_bonds """ Processor implementing a template based back @@ -29,32 +20,16 @@ a meta molecule. """ -def orient_template(meta_molecule, current_node, template, built_nodes, protocol): - """ - Given a `template` and a `node` of a `meta_molecule` at lower resolution - find the orientation of the template by a chosen protocol. - - Available protocols: - - by optimizing bonded interactions (i.e. 'orient_by_bonds' / 'default') +""" +BACKMAPPING_MODE contains a set of implemented backmapping modes. - Parameters: - ----------- - meta_molecule: :class:`polyply.src.meta_molecule` - current_node: - node key of the node in meta_molecule to which template referes to - template: dict[collections.abc.Hashable, np.ndarray] - dict of positions referring to the lower resolution atoms of node - built_nodes: list - list of meta_molecule node keys of residues that are already built +Available modes: + - automatically selecting a backmapping method (i.e. 'automatic') [UNDER CONSTRUCTION!!] + - by optimizing bonded interactions (i.e. 'orient_by_bonds') - Returns: - -------- - dict - the oriented template - """ +""" - if protocol == "default": - return orient_by_bonds(meta_molecule, current_node, template, built_nodes) +BACKMAPPING_MODE = {'automatic': orient_by_bonds, 'by-bonds': orient_by_bonds} class Backmap(Processor): """ @@ -63,10 +38,10 @@ class Backmap(Processor): for the lower resolution molecule associated with the MetaMolecule. """ - def __init__(self, fudge_coords=0.4, protocol='default', *args, **kwargs): + def __init__(self, fudge_coords=0.4, bmode='automatic', *args, **kwargs): super().__init__(*args, **kwargs) self.fudge_coords = fudge_coords - self.protocol = protocol + self.bmode = bmode def _place_init_coords(self, meta_molecule): """ @@ -87,10 +62,9 @@ def _place_init_coords(self, meta_molecule): resid = meta_molecule.nodes[node]["resid"] high_res_atoms = meta_molecule.nodes[node]["graph"].nodes - template = orient_template(meta_molecule, node, - meta_molecule.templates[resname], - built_nodes, - self.protocol) + template = BACKMAPPING_MODE[self.bmode](meta_molecule, node, + meta_molecule.templates[resname], + built_nodes) for atom_high in high_res_atoms: atomname = meta_molecule.molecule.nodes[atom_high]["atomname"] diff --git a/polyply/src/gen_coords.py b/polyply/src/gen_coords.py index 0c4dddef..769d35f9 100644 --- a/polyply/src/gen_coords.py +++ b/polyply/src/gen_coords.py @@ -116,7 +116,7 @@ def gen_coords(toppath, max_force=5*10**4.0, nrewind=5, lib=None, - protocol='default', + bmode='automatic', bfudge=0.4): """ Subprogram for coordinate generation which implements the default @@ -186,8 +186,8 @@ def gen_coords(toppath, nrewind: int Number of residues to trace back when random walk step fails in first attempt. The default is 5. - protocol: str - Name of the chosen backmapping protocol + bmode: str + Name of the chosen backmapping mode bfudge: float Fudge factor by which to scale the coordinates of the residues during the backmapping step. 1 will result in to-scale coordinates @@ -264,7 +264,7 @@ def gen_coords(toppath, nrewind=nrewind).run_system(topology.molecules) ligand_annotator.split_ligands() LOGGER.info("backmapping to target resolution", type="step") - Backmap(fudge_coords=bfudge, protocol=protocol).run_system(topology) + Backmap(fudge_coords=bfudge, bmode=bmode).run_system(topology) # Write output LOGGER.info("writing output", type="step") command = ' '.join(sys.argv) diff --git a/polyply/src/orient_by_bonds.py b/polyply/src/orient_by_bonds.py index 423a2168..fe43ff68 100644 --- a/polyply/src/orient_by_bonds.py +++ b/polyply/src/orient_by_bonds.py @@ -1,7 +1,6 @@ import numpy as np import scipy.optimize import networkx as nx -from .generate_templates import find_atoms from .linalg_functions import rotate_xyz from .graph_utils import find_connecting_edges from .linalg_functions import norm_matrix