From 838d9e2b26a967b019d8fe7b8a7d8f85f76b2724 Mon Sep 17 00:00:00 2001
From: ajfriedman22 <anika.friedman@colorado.edu>
Date: Mon, 4 Nov 2024 09:40:45 -0700
Subject: [PATCH] Fix topology parsing bug

---
 ensemble_md/replica_exchange_EE.py   | 21 +++++++++-----
 ensemble_md/utils/coordinate_swap.py | 42 +++++++++++++++++-----------
 2 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py
index 57441ff..dc8f4f0 100644
--- a/ensemble_md/replica_exchange_EE.py
+++ b/ensemble_md/replica_exchange_EE.py
@@ -1608,7 +1608,10 @@ def process_top(self):
                 input_file = gmx_parser.read_top(file_name, self.resname_list[f])
 
                 # Determine the atom names corresponding to the atom numbers
-                start_line, atom_name, state = coordinate_swap.get_names(input_file)
+                start_line, atom_name, atom_num, state = coordinate_swap.get_names(input_file, self.resname_list[f])
+                print(f'start line #: {start_line}')
+                print(f'atom name: {atom_name}')
+                print(f'atom #: {atom_num}')
 
                 # Determine the connectivity of all atoms
                 connect_1, connect_2, state_1, state_2 = [], [], [], []  # Atom 1 and atom 2 which are connected and which state they are dummy atoms  # noqa: E501
@@ -1620,10 +1623,14 @@ def process_top(self):
                         break
                     while '' in line_sep:
                         line_sep.remove('')
-                    connect_1.append(atom_name[int(line_sep[0])-1])
-                    connect_2.append(atom_name[int(line_sep[1])-1])
-                    state_1.append(state[int(line_sep[0])-1])
-                    state_2.append(state[int(line_sep[1])-1])
+                    if int(line_sep[0]) in atom_num and int(line_sep[1]) in atom_num:
+                        num_1 = np.where(atom_num == int(line_sep[0]))[0][0]
+                        num_2 = np.where(atom_num == int(line_sep[1]))[0][0]
+                        connect_1.append(atom_name[num_1])
+                        connect_2.append(atom_name[num_2])
+                        state_1.append(state[num_1])
+                        state_2.append(state[num_2])
+
                 df = pd.DataFrame({'Resname': self.resname_list[f], 'Connect 1': connect_1, 'Connect 2': connect_2, 'State 1': state_1, 'State 2': state_2})  # noqa: E501
                 df_top = pd.concat([df_top, df])
             df_top.to_csv('residue_connect.csv')
@@ -1638,9 +1645,9 @@ def process_top(self):
                 lam = {X: int(swap[0][1]), Y: int(swap[1][1])}
                 for A, B in zip([X, Y], [Y, X]):
                     input_A = gmx_parser.read_top(self.top[A], self.resname_list[A])
-                    start_line, A_name, state = coordinate_swap.get_names(input_A)
+                    start_line, A_name, A_num, state = coordinate_swap.get_names(input_A, self.resname_list[A])
                     input_B = gmx_parser.read_top(self.top[B], self.resname_list[B])
-                    start_line, B_name, state = coordinate_swap.get_names(input_B)
+                    start_line, B_name, B_num, state = coordinate_swap.get_names(input_B, self.resname_list[B])
 
                     A_only = [x for x in A_name if x not in B_name]
                     B_only = [x for x in B_name if x not in A_name]
diff --git a/ensemble_md/utils/coordinate_swap.py b/ensemble_md/utils/coordinate_swap.py
index 9ffae8b..bec22f4 100644
--- a/ensemble_md/utils/coordinate_swap.py
+++ b/ensemble_md/utils/coordinate_swap.py
@@ -1114,7 +1114,7 @@ def _swap_name(init_atom_names, new_resname, df_top):
     return new_atom_names
 
 
-def get_names(input):
+def get_names(input, resname):
     """
     Determines the names of all atoms in the topology and which :math:`lambda` state for which they are dummy atoms.
 
@@ -1122,18 +1122,21 @@ def get_names(input):
     ----------
     input : list
         A list of strings containing the lines of the input topology.
-
+    resname : str
+        Name of residue of interest for which to extract atom names
     Returns
     -------
     start_line : int
         The next line to start reading from the topology.
     atom_name : list
-        All atom names in the topology.
+        All atom names in the topology corresponding to the residue of interest.
+    atom_num : list
+        The atom numbers corresponding to the atom names in atom_name
     state : list
         The state that the atom is a dummy atom (:math:`lambda=0`, :math:`lambda=1`, or -1 if nevver dummy).
     """
     atom_section = False
-    atom_name, state = [], []
+    atom_name, atom_num, state = [], [], []
     for l, line in enumerate(input):  # noqa: E741
         if atom_section:
             line_sep = line.split(' ')
@@ -1144,18 +1147,18 @@ def get_names(input):
             if line_sep[0] == '\n':
                 start_line = l+2
                 break
-            atom_name.append(line_sep[4])
-            if len(line_sep) < 10:
-                state.append(-1)
-            elif float(line_sep[6]) == 0:
-                state.append(0)
-            elif float(line_sep[9]) == 0:
-                state.append(1)
-            else:
-                state.append(-1)
+            if line_sep[3] == resname:
+                atom_name.append(line_sep[4])
+                atom_num.append(int(line_sep[0]))
+                if float(line_sep[6]) == 0:
+                    state.append(0)
+                elif float(line_sep[9]) == 0:
+                    state.append(1)
+                else:
+                    state.append(-1)
         if line == '[ atoms ]\n':
             atom_section = True
-    return start_line, atom_name, state
+    return start_line, atom_name, np.array(atom_num), state
 
 
 def determine_connection(main_only, other_only, main_name, other_name, df_top, main_state):
@@ -1190,9 +1193,15 @@ def determine_connection(main_only, other_only, main_name, other_name, df_top, m
     align_atom, angle_atom = [], []
     for atom in main_only:
         element = atom.strip('0123456789')
-        real_element = element.strip('DV')
+        e_split = list(element)
+        if e_split[0] == 'D':
+            real_element = ''.join(e_split[1:])
+        elif len(e_split) > 2 and e_split[1] == 'V':
+            del e_split[1]
+            real_element = ''.join(e_split)
+        else:
+            real_element = element
         num = atom.strip('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
-
         if f'D{atom}' in other_only or f'{element}V{num}' in other_only:
             D2R.append(atom)
         elif f'{real_element}{num}' in other_only:
@@ -1200,6 +1209,7 @@ def determine_connection(main_only, other_only, main_name, other_name, df_top, m
         else:
             miss.append(atom)
     df_select = df_top[df_top['Resname'] == main_name]
+
     # Seperate into each seperate functional group to be added
     anchor_atoms = []