Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure no duplicate atom names are given #15

Merged
merged 3 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/hydride/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def add_hydrogen(atoms, mask=None, fragment_library=None, name_library=None, box
)
p += res_length
# Set annotation and coordinates for hydrogen atoms
already_used_names = set()
for j in range(start, stop):
hydrogen_coord_for_atom = hydrogen_coord[j]
hydrogen_name_generator = name_library.generate_hydrogen_names(
Expand All @@ -130,7 +131,11 @@ def add_hydrogen(atoms, mask=None, fragment_library=None, name_library=None, box
hydrogenated_atoms.ins_code[p] = atoms.ins_code[j]
hydrogenated_atoms.res_name[p] = atoms.res_name[j]
hydrogenated_atoms.hetero[p] = atoms.hetero[j]
hydrogenated_atoms.atom_name[p] = next(hydrogen_name_generator)
for hydrogen_name in hydrogen_name_generator:
if hydrogen_name == "" or hydrogen_name not in already_used_names:
already_used_names.add(hydrogen_name)
break
hydrogenated_atoms.atom_name[p] = hydrogen_name
hydrogenated_atoms.element[p] = "H"
heavy_index = index_mapping[j]
hydrogen_index = p
Expand Down
15 changes: 7 additions & 8 deletions src/hydride/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
__author__ = "Patrick Kunzmann"
__all__ = ["FragmentLibrary"]

import functools
import pickle
import warnings
from os.path import abspath, dirname, join
Expand Down Expand Up @@ -54,11 +55,10 @@ class FragmentLibrary:
Acta Cryst, 34, 827-828 (1978).
"""

_std_library = None

def __init__(self):
self._frag_dict = {}

@functools.cache
@staticmethod
def standard_library():
"""
Expand All @@ -71,12 +71,11 @@ def standard_library():
library : FragmentLibrary
The standard library.
"""
if FragmentLibrary._std_library is None:
FragmentLibrary._std_library = FragmentLibrary()
file_name = join(dirname(abspath(__file__)), "fragments.pickle")
with open(file_name, "rb") as fragments_file:
FragmentLibrary._std_library._frag_dict = pickle.load(fragments_file)
return FragmentLibrary._std_library
fragment_library = FragmentLibrary()
file_name = join(dirname(abspath(__file__)), "fragments.pickle")
with open(file_name, "rb") as fragments_file:
fragment_library._frag_dict = pickle.load(fragments_file)
return fragment_library

def add_molecule(self, molecule):
"""
Expand Down
36 changes: 13 additions & 23 deletions src/hydride/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
__author__ = "Patrick Kunzmann"
__all__ = ["AtomNameLibrary"]

import functools
import itertools
import pickle
import string
from os.path import abspath, dirname, join
Expand All @@ -28,11 +30,10 @@ class AtomNameLibrary:
hydrogen naming schemes.
"""

_std_library = None

def __init__(self):
self._name_dict = {}

@functools.cache
@staticmethod
def standard_library():
"""
Expand All @@ -45,12 +46,11 @@ def standard_library():
library : AtomNameLibrary
The standard library.
"""
if AtomNameLibrary._std_library is None:
AtomNameLibrary._std_library = AtomNameLibrary()
file_name = join(dirname(abspath(__file__)), "names.pickle")
with open(file_name, "rb") as names_file:
AtomNameLibrary._std_library._name_dict = pickle.load(names_file)
return AtomNameLibrary._std_library
name_library = AtomNameLibrary()
file_name = join(dirname(abspath(__file__)), "names.pickle")
with open(file_name, "rb") as names_file:
name_library._name_dict = pickle.load(names_file)
return name_library

def add_molecule(self, molecule):
"""
Expand Down Expand Up @@ -99,18 +99,14 @@ def generate_hydrogen_names(self, heavy_res_name, heavy_atom_name):
yield hydrogen_name
try:
base_name = hydrogen_name[:-1]
number = int(hydrogen_name[-1])
while True:
for number in itertools.count(int(hydrogen_name[-1]) + 1):
# Proceed by increasing the atom number
# e.g. CB -> HB1, HB2, HB3, ...
number += 1
yield f"{base_name}{number}"
except ValueError:
# Atom name has no number at the end
# -> simply append number
number = 0
while True:
number += 1
for number in itertools.count(1):
yield f"{hydrogen_name}{number}"

else:
Expand All @@ -128,23 +124,17 @@ def generate_hydrogen_names(self, heavy_res_name, heavy_atom_name):
heavy_atom_name[0]
# C1 -> H1, H1A, H1B
yield f"H{number}"
i = 0
while True:
for i in itertools.count():
yield f"H{number}{string.ascii_uppercase[i]}"
i += 1
elif len(heavy_atom_name) > 1:
# e.g. CA -> HA, HA2, HA3, ...
suffix = heavy_atom_name[1:]
yield f"H{suffix}"
number = 1
while True:
for number in itertools.count(1):
yield f"H{suffix}{number}"
number += 1

else:
# N -> H, H2, H3, ...
yield "H"
number = 1
while True:
for number in itertools.count(1):
yield f"H{number}"
number += 1
52 changes: 52 additions & 0 deletions tests/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,55 @@ def test_original_mask(path):

assert hydrogenated_model.array_length() > ref_model.array_length()
assert test_model == ref_model


def test_no_duplicate_names():
"""
Check if no duplicate hydrogen names are given within the same residue.
This is tested by the extreme case of a residue where each heavy atom has the same
name.
"""
residue_1 = info.residue("ALA")
residue_2 = info.residue("ALA")
# Ensure that both residues in the array can be distinguished as such
# in the concatenated array
residue_1.res_id[:] = 1
residue_2.res_id[:] = 2
# Create a heavy atom array from two residues
atoms = residue_1 + residue_2
atoms = atoms[atoms.element != "H"]
# Give all atoms the same name to check if still unique hydrogen names are assigned
atoms.atom_name[:] = "CA"

atoms, _ = hydride.add_hydrogen(atoms)

hydrogen_atoms = atoms[atoms.element == "H"]
atom_names = hydrogen_atoms.atom_name
# Within a residue all hydrogen atom names should be unique
for res_id in (1, 2):
atom_names_in_residue = atom_names[hydrogen_atoms.res_id == res_id]
assert len(np.unique(atom_names_in_residue)) == len(atom_names_in_residue)
# But two different residues should reset the used names
# As here the two residues are the same, we expect the same names
assert np.all(
atom_names[hydrogen_atoms.res_id == 1] == atom_names[hydrogen_atoms.res_id == 2]
)


def test_empty_annotations():
"""
Check if hydrogen addition (coordinates and naming) works, if a molecule has no
residue name and atom names.
"""
ref_molecule = info.residue("ALA")
ref_molecule.atom_name[:] = ""
ref_molecule.res_name[:] = ""

test_molecule = ref_molecule[ref_molecule.element != "H"]
test_molecule, _ = hydride.add_hydrogen(test_molecule)

hydrogen_atoms = test_molecule[test_molecule.element == "H"]
# Expect empty hydrogen names for empty heavy atom names
assert np.all(hydrogen_atoms.atom_name == "")
# Roughly check correct hydrogen addition
assert test_molecule.array_length() == ref_molecule.array_length()
Loading