diff --git a/ensemble_md/analysis/clustering.py b/ensemble_md/analysis/clustering.py index bd7985eb..01621780 100644 --- a/ensemble_md/analysis/clustering.py +++ b/ensemble_md/analysis/clustering.py @@ -11,7 +11,7 @@ from ensemble_md.utils.utils import run_gmx_cmd -def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suffix=None): +def cluster_traj(gmx_executable, inputs, grps, coupled_only=True, method='linkage', cutoff=0.1, suffix=None): """ Performs clustering analysis on a trajectory using the GROMACS command :code:`gmx cluster`. Note that only fully coupled configurations are considered. @@ -21,27 +21,46 @@ def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suf gmx_executable : str The path to the GROMACS executable. inputs : dict - A dictionary that contains the file names of the input trajectory file (XTC or TRR), - the configuration file (TPR or GRO), the file that contains the time series of the - state index, and the index file (NDX). The must include the keys :code:`traj`, :code:`config`, - :code:`xvg`, and :code:`index`. Note that the value for the key :code:`index` can be :code:`None`. + A dictionary that contains the different input files required for the clustering analysis. + The dictionary must have the following four keys: :code:`traj` (input trajectory file in + XTC or TRR format), :code:`config` (the configuration file in TPR or GRO format), + :code:`xvg` (a GROMACS XVG file), and :code:`index` (an index/NDX file), with the values + being the paths. Note that the value of the key :code:`index` can be :code:`None`, in which + case the function will use a default index file generated by :code:`gmx make_ndx`. If the + parameter :code:`coupled_only` is set to :code:`True`, an XVG file that contains the time + series of the state index (e.g., :code:`dhdl.xvg`) must be provided with the key :code:`xvg`. + Otherwise, the key :code:`xvg` can be set to :code:`None`. grps : dict A dictionary that contains the names of the groups in the index file (NDX) for centering the system, calculating the RMSD, and outputting. The keys are :code:`center`, :code:`rmsd`, and :code:`output`. + coupled_only : bool + Whether to consider only the fully coupled configurations. The default is :code:`True`. method : str The method for clustering available for the GROMACS command :code:`gmx cluster`. The default is 'linkage'. + Check the GROMACS documentation for other available options. cutoff : float - The cutoff in RMSD for clustering. The default is 0.1. + The RMSD cutoff for clustering in nm. The default is 0.1. suffix : str The suffix for the output files. The default is :code:`None`, which means no suffix will be added. """ - # First check if all specified groups are present in the index file + # Check if the index file is provided + if inputs['index'] is None: + print('Running gmx make_ndx to generate an index file ...') + args = [ + gmx_executable, 'make_ndx', + '-f', inputs['config'], + '-o', 'index.ndx', + ] + returncode, stdout, stderr = run_gmx_cmd(args, prompt_input='q\n') + inputs['index'] = 'index.ndx' + + # Check if the groups are present in the index file with open(inputs['index'], 'r') as f: content = f.read() for key in grps: if grps[key] not in content: - raise ValueError(f'The group {grps[key]} is not present in the index file.') + raise ValueError(f'The group {grps[key]} is not present in the provided/generated index file.') outputs = { 'nojump': 'nojump.xtc', @@ -61,14 +80,20 @@ def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suf gmx_executable, 'trjconv', '-f', inputs['traj'], '-s', inputs['config'], + '-n', inputs['index'], '-o', outputs['nojump'], '-center', 'yes', '-pbc', 'nojump', - '-drop', inputs['xvg'], - '-dropover', '0' ] - if inputs['index'] is not None: - args.extend(['-n', inputs['index']]) + + if coupled_only: + if inputs['xvg'] is None: + raise ValueError('The parameter "coupled_only" is set to True but no XVG file is provided.') + args.extend([ + '-drop', inputs['xvg'], + '-dropover', '0' + ]) + returncode, stdout, stderr = run_gmx_cmd(args, prompt_input=f'{grps["center"]}\n{grps["output"]}\n') if returncode != 0: print(f'Error with return code: {returncode}):\n{stderr}') @@ -78,13 +103,12 @@ def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suf gmx_executable, 'trjconv', '-f', outputs['nojump'], '-s', inputs['config'], + '-n', inputs['index'], '-o', outputs['center'], '-center', 'yes', '-pbc', 'mol', '-ur', 'compact', ] - if inputs['index'] is not None: - args.extend(['-n', inputs['index']]) returncode, stdout, stderr = run_gmx_cmd(args, prompt_input=f'{grps["center"]}\n{grps["output"]}\n') if returncode != 0: print(f'Error with return code: {returncode}):\n{stderr}') @@ -94,6 +118,7 @@ def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suf gmx_executable, 'cluster', '-f', outputs['center'], '-s', inputs['config'], + '-n', inputs['index'], '-o', outputs['rmsd-clust'], '-dist', outputs['rmsd-dist'], '-g', outputs['cluster-log'], @@ -101,8 +126,6 @@ def cluster_traj(gmx_executable, inputs, grps, method='linkage', cutoff=0.1, suf '-cutoff', str(cutoff), '-method', method, ] - if inputs['index'] is not None: - args.extend(['-n', inputs['index']]) returncode, stdout, stderr = run_gmx_cmd(args, prompt_input=f'{grps["rmsd"]}\n{grps["output"]}\n') if returncode != 0: print(f'Error with return code: {returncode}):\n{stderr}')