From 838d9e2b26a967b019d8fe7b8a7d8f85f76b2724 Mon Sep 17 00:00:00 2001 From: ajfriedman22 Date: Mon, 4 Nov 2024 09:40:45 -0700 Subject: [PATCH] Fix topology parsing bug --- ensemble_md/replica_exchange_EE.py | 21 +++++++++----- ensemble_md/utils/coordinate_swap.py | 42 +++++++++++++++++----------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/ensemble_md/replica_exchange_EE.py b/ensemble_md/replica_exchange_EE.py index 57441ff..dc8f4f0 100644 --- a/ensemble_md/replica_exchange_EE.py +++ b/ensemble_md/replica_exchange_EE.py @@ -1608,7 +1608,10 @@ def process_top(self): input_file = gmx_parser.read_top(file_name, self.resname_list[f]) # Determine the atom names corresponding to the atom numbers - start_line, atom_name, state = coordinate_swap.get_names(input_file) + start_line, atom_name, atom_num, state = coordinate_swap.get_names(input_file, self.resname_list[f]) + print(f'start line #: {start_line}') + print(f'atom name: {atom_name}') + print(f'atom #: {atom_num}') # Determine the connectivity of all atoms connect_1, connect_2, state_1, state_2 = [], [], [], [] # Atom 1 and atom 2 which are connected and which state they are dummy atoms # noqa: E501 @@ -1620,10 +1623,14 @@ def process_top(self): break while '' in line_sep: line_sep.remove('') - connect_1.append(atom_name[int(line_sep[0])-1]) - connect_2.append(atom_name[int(line_sep[1])-1]) - state_1.append(state[int(line_sep[0])-1]) - state_2.append(state[int(line_sep[1])-1]) + if int(line_sep[0]) in atom_num and int(line_sep[1]) in atom_num: + num_1 = np.where(atom_num == int(line_sep[0]))[0][0] + num_2 = np.where(atom_num == int(line_sep[1]))[0][0] + connect_1.append(atom_name[num_1]) + connect_2.append(atom_name[num_2]) + state_1.append(state[num_1]) + state_2.append(state[num_2]) + df = pd.DataFrame({'Resname': self.resname_list[f], 'Connect 1': connect_1, 'Connect 2': connect_2, 'State 1': state_1, 'State 2': state_2}) # noqa: E501 df_top = pd.concat([df_top, df]) df_top.to_csv('residue_connect.csv') @@ -1638,9 +1645,9 @@ def process_top(self): lam = {X: int(swap[0][1]), Y: int(swap[1][1])} for A, B in zip([X, Y], [Y, X]): input_A = gmx_parser.read_top(self.top[A], self.resname_list[A]) - start_line, A_name, state = coordinate_swap.get_names(input_A) + start_line, A_name, A_num, state = coordinate_swap.get_names(input_A, self.resname_list[A]) input_B = gmx_parser.read_top(self.top[B], self.resname_list[B]) - start_line, B_name, state = coordinate_swap.get_names(input_B) + start_line, B_name, B_num, state = coordinate_swap.get_names(input_B, self.resname_list[B]) A_only = [x for x in A_name if x not in B_name] B_only = [x for x in B_name if x not in A_name] diff --git a/ensemble_md/utils/coordinate_swap.py b/ensemble_md/utils/coordinate_swap.py index 9ffae8b..bec22f4 100644 --- a/ensemble_md/utils/coordinate_swap.py +++ b/ensemble_md/utils/coordinate_swap.py @@ -1114,7 +1114,7 @@ def _swap_name(init_atom_names, new_resname, df_top): return new_atom_names -def get_names(input): +def get_names(input, resname): """ Determines the names of all atoms in the topology and which :math:`lambda` state for which they are dummy atoms. @@ -1122,18 +1122,21 @@ def get_names(input): ---------- input : list A list of strings containing the lines of the input topology. - + resname : str + Name of residue of interest for which to extract atom names Returns ------- start_line : int The next line to start reading from the topology. atom_name : list - All atom names in the topology. + All atom names in the topology corresponding to the residue of interest. + atom_num : list + The atom numbers corresponding to the atom names in atom_name state : list The state that the atom is a dummy atom (:math:`lambda=0`, :math:`lambda=1`, or -1 if nevver dummy). """ atom_section = False - atom_name, state = [], [] + atom_name, atom_num, state = [], [], [] for l, line in enumerate(input): # noqa: E741 if atom_section: line_sep = line.split(' ') @@ -1144,18 +1147,18 @@ def get_names(input): if line_sep[0] == '\n': start_line = l+2 break - atom_name.append(line_sep[4]) - if len(line_sep) < 10: - state.append(-1) - elif float(line_sep[6]) == 0: - state.append(0) - elif float(line_sep[9]) == 0: - state.append(1) - else: - state.append(-1) + if line_sep[3] == resname: + atom_name.append(line_sep[4]) + atom_num.append(int(line_sep[0])) + if float(line_sep[6]) == 0: + state.append(0) + elif float(line_sep[9]) == 0: + state.append(1) + else: + state.append(-1) if line == '[ atoms ]\n': atom_section = True - return start_line, atom_name, state + return start_line, atom_name, np.array(atom_num), state def determine_connection(main_only, other_only, main_name, other_name, df_top, main_state): @@ -1190,9 +1193,15 @@ def determine_connection(main_only, other_only, main_name, other_name, df_top, m align_atom, angle_atom = [], [] for atom in main_only: element = atom.strip('0123456789') - real_element = element.strip('DV') + e_split = list(element) + if e_split[0] == 'D': + real_element = ''.join(e_split[1:]) + elif len(e_split) > 2 and e_split[1] == 'V': + del e_split[1] + real_element = ''.join(e_split) + else: + real_element = element num = atom.strip('ABCDEFGHIJKLMNOPQRSTUVWXYZ') - if f'D{atom}' in other_only or f'{element}V{num}' in other_only: D2R.append(atom) elif f'{real_element}{num}' in other_only: @@ -1200,6 +1209,7 @@ def determine_connection(main_only, other_only, main_name, other_name, df_top, m else: miss.append(atom) df_select = df_top[df_top['Resname'] == main_name] + # Seperate into each seperate functional group to be added anchor_atoms = []