From b8e16d761c37ca435b2207232a7f40af3995db2f Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 29 Sep 2023 19:13:58 -0700 Subject: [PATCH 01/10] Splitting out SetupUnit from SimulationUnit in NonEquilibriumCyclingProtocol --- .../protocols/nonequilibrium_cycling.py | 257 +++++++++++------- 1 file changed, 152 insertions(+), 105 deletions(-) diff --git a/fenchiridion/protocols/nonequilibrium_cycling.py b/fenchiridion/protocols/nonequilibrium_cycling.py index 2998876..80291e6 100644 --- a/fenchiridion/protocols/nonequilibrium_cycling.py +++ b/fenchiridion/protocols/nonequilibrium_cycling.py @@ -27,6 +27,154 @@ # logger = logging.getLogger(__name__) +class SetupUnit(ProtocolUnit): + + def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): + """ + Execute the simulation part of the Nonequilibrium switching protocol using GUFE objects. + + Parameters + ---------- + ctx: gufe.protocols.protocolunit.Context + The gufe context for the unit. + + state_a : gufe.ChemicalSystem + The initial chemical system. + + + state_b : gufe.ChemicalSystem + The objective chemical system. + + mapping : dict[str, gufe.mapping.ComponentMapping] + A dict featuring mappings between the two chemical systems. + + settings : gufe.settings.model.Settings + The full settings for the protocol. + + Returns + ------- + dict : dict[str, str] + Dictionary with paths to work arrays, both forward and reverse, and trajectory coordinates for systems + A and B. + """ + # needed imports + import numpy as np + import openmm + import openmm.unit as openmm_unit + from openmmtools.integrators import PeriodicNonequilibriumIntegrator + from perses.utils.openeye import generate_unique_atom_names + + # Check compatibility between states (same receptor and solvent) + self._check_states_compatibility(state_a, state_b) + + # Get components from systems if found (None otherwise) -- NOTE: Uses hardcoded keys! + receptor_a = state_a.components.get("protein") + # receptor_b = state_b.components.get("protein") # Should not be needed + ligand_a = mapping.get("ligand").componentA + ligand_b = mapping.get("ligand").componentB + solvent_a = state_a.components.get("solvent") + # solvent_b = state_b.components.get("solvent") # Should not be needed + + # Check first state for receptor if not get receptor from second one + if receptor_a: + receptor_top = receptor_a.to_openmm_topology() + receptor_pos = receptor_a.to_openmm_positions() + else: + receptor_top, receptor_pos = None, None + + # Get ligands cheminformatics molecules + ligand_a = ligand_a.to_openff().to_openeye() + ligand_b = ligand_b.to_openff().to_openeye() + # Generating unique atom names for ligands -- openmmforcefields needs them + ligand_a = generate_unique_atom_names(ligand_a) + ligand_b = generate_unique_atom_names(ligand_b) + + # Get solvent parameters from component + if solvent_a: + ion_concentration = solvent_a.ion_concentration.to_openmm() + positive_ion = solvent_a.positive_ion + negative_ion = solvent_a.negative_ion + else: + ion_concentration, positive_ion, negative_ion = None, None, None + + # Get settings + thermodynamic_settings = settings.thermo_settings + phase = self._detect_phase(state_a, state_b) + traj_save_frequency = settings.traj_save_frequency + work_save_frequency = settings.work_save_frequency # Note: this is divisor of traj save freq. + selection_expression = settings.atom_selection_expression + + # Get the ligand mapping from ComponentMapping object + # NOTE: perses to date has a different "directionality" sense in terms of the mapping, + # see perses.rjmc.topology_proposal.propose docstring for detailed information. + ligand_mapping = mapping['ligand'].componentB_to_componentA + + # Setup relative FE calculation + fe_setup = RelativeFEPSetup( + old_ligand=ligand_a, + new_ligand=ligand_b, + receptor=receptor_top, + receptor_positions=receptor_pos, + forcefield_files=settings.forcefield_settings.forcefields, + small_molecule_forcefield=settings.forcefield_settings.small_molecule_forcefield, + phases=[phase], + transformation_atom_map=ligand_mapping, # Handle atom mapping between systems + ionic_strength=ion_concentration, + positive_ion=positive_ion, + negative_ion=negative_ion, + ) + + topology_proposals = fe_setup.topology_proposals + old_positions = fe_setup.old_positions + new_positions = fe_setup.new_positions + + # Generate Hybrid Topology Factory - Generic HTF + htf = HybridTopologyFactory( + topology_proposal=topology_proposals[phase], + current_positions=old_positions[phase], + new_positions=new_positions[phase], + softcore_LJ_v2=settings.softcore_LJ_v2, + interpolate_old_and_new_14s=settings.interpolate_old_and_new_14s, + ) + + system = htf.hybrid_system + positions = htf.hybrid_positions + + # Set up integrator + temperature = to_openmm(thermodynamic_settings.temperature) + neq_steps = settings.eq_steps + eq_steps = settings.neq_steps + timestep = to_openmm(settings.timestep) + splitting = settings.neq_splitting + integrator = PeriodicNonequilibriumIntegrator(alchemical_functions=settings.lambda_functions, + nsteps_neq=neq_steps, + nsteps_eq=eq_steps, + splitting=splitting, + timestep=timestep, + temperature=temperature, ) + + # Set up context + platform = get_openmm_platform(settings.platform) + context = openmm.Context(system, integrator, platform) + context.setPeriodicBoxVectors(*system.getDefaultPeriodicBoxVectors()) + context.setPositions(positions) + + try: + # Minimize + openmm.LocalEnergyMinimizer.minimize(context) + + # Equilibrate + context.setVelocitiesToTemperature(temperature) + + # SERIALIZE SYSTEM, STATE, INTEGRATOR + + finally: + # Explicit cleanup for GPU resources + del context, integrator + + return {} + + class SimulationUnit(ProtocolUnit): """ Monolithic unit for simulation. It runs NEQ switching simulation from chemical systems and stores the @@ -148,24 +296,16 @@ def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not return initial_selected_positions, final_selected_positions - def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): + def _execute(self, ctx, *, setup, settings, **inputs): """ Execute the simulation part of the Nonequilibrium switching protocol using GUFE objects. Parameters ---------- - ctx: gufe.protocols.protocolunit.Context + ctx : gufe.protocols.protocolunit.Context The gufe context for the unit. - state_a : gufe.ChemicalSystem - The initial chemical system. - - state_b : gufe.ChemicalSystem - The objective chemical system. - - mapping : dict[str, gufe.mapping.ComponentMapping] - A dict featuring mappings between the two chemical systems. - + setup : settings : gufe.settings.model.Settings The full settings for the protocol. @@ -191,95 +331,7 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): file_handler.setFormatter(log_formatter) file_logger.addHandler(file_handler) - # Check compatibility between states (same receptor and solvent) - self._check_states_compatibility(state_a, state_b) - - # Get components from systems if found (None otherwise) -- NOTE: Uses hardcoded keys! - receptor_a = state_a.components.get("protein") - # receptor_b = state_b.components.get("protein") # Should not be needed - ligand_a = mapping.get("ligand").componentA - ligand_b = mapping.get("ligand").componentB - solvent_a = state_a.components.get("solvent") - # solvent_b = state_b.components.get("solvent") # Should not be needed - - - # Check first state for receptor if not get receptor from second one - if receptor_a: - receptor_top = receptor_a.to_openmm_topology() - receptor_pos = receptor_a.to_openmm_positions() - else: - receptor_top, receptor_pos = None, None - - # Get ligands cheminformatics molecules - ligand_a = ligand_a.to_openff().to_openeye() - ligand_b = ligand_b.to_openff().to_openeye() - # Generating unique atom names for ligands -- openmmforcefields needs them - ligand_a = generate_unique_atom_names(ligand_a) - ligand_b = generate_unique_atom_names(ligand_b) - - # Get solvent parameters from component - if solvent_a: - ion_concentration = solvent_a.ion_concentration.to_openmm() - positive_ion = solvent_a.positive_ion - negative_ion = solvent_a.negative_ion - else: - ion_concentration, positive_ion, negative_ion = None, None, None - - # Get settings - thermodynamic_settings = settings.thermo_settings - phase = self._detect_phase(state_a, state_b) - traj_save_frequency = settings.traj_save_frequency - work_save_frequency = settings.work_save_frequency # Note: this is divisor of traj save freq. - selection_expression = settings.atom_selection_expression - - # Get the ligand mapping from ComponentMapping object - # NOTE: perses to date has a different "directionality" sense in terms of the mapping, - # see perses.rjmc.topology_proposal.propose docstring for detailed information. - ligand_mapping = mapping['ligand'].componentB_to_componentA - - # Setup relative FE calculation - fe_setup = RelativeFEPSetup( - old_ligand=ligand_a, - new_ligand=ligand_b, - receptor=receptor_top, - receptor_positions=receptor_pos, - forcefield_files=settings.forcefield_settings.forcefields, - small_molecule_forcefield=settings.forcefield_settings.small_molecule_forcefield, - phases=[phase], - transformation_atom_map=ligand_mapping, # Handle atom mapping between systems - ionic_strength=ion_concentration, - positive_ion=positive_ion, - negative_ion=negative_ion, - ) - - topology_proposals = fe_setup.topology_proposals - old_positions = fe_setup.old_positions - new_positions = fe_setup.new_positions - - # Generate Hybrid Topology Factory - Generic HTF - htf = HybridTopologyFactory( - topology_proposal=topology_proposals[phase], - current_positions=old_positions[phase], - new_positions=new_positions[phase], - softcore_LJ_v2=settings.softcore_LJ_v2, - interpolate_old_and_new_14s=settings.interpolate_old_and_new_14s, - ) - - system = htf.hybrid_system - positions = htf.hybrid_positions - - # Set up integrator - temperature = to_openmm(thermodynamic_settings.temperature) - neq_steps = settings.eq_steps - eq_steps = settings.neq_steps - timestep = to_openmm(settings.timestep) - splitting = settings.neq_splitting - integrator = PeriodicNonequilibriumIntegrator(alchemical_functions=settings.lambda_functions, - nsteps_neq=neq_steps, - nsteps_eq=eq_steps, - splitting=splitting, - timestep=timestep, - temperature=temperature, ) + # GET STATE, SYSTEM, AND INTEGRATOR FROM SETUP UNIT # Set up context platform = get_openmm_platform(settings.platform) @@ -288,11 +340,6 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): context.setPositions(positions) try: - # Minimize - openmm.LocalEnergyMinimizer.minimize(context) - - # Equilibrate - context.setVelocitiesToTemperature(temperature) # Prepare objects to store data -- empty lists so far forward_eq_old, forward_eq_new, forward_neq_old, forward_neq_new = list(), list(), list(), list() From b93039930056eeb65ac731ff5ee5589ffca49ced Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 2 Oct 2023 21:21:08 -0700 Subject: [PATCH 02/10] Moving things around from live prototyping --- feflow/protocols/nonequilibrium_cycling.py | 137 +++++++++++---------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index d4e1ec4..85c48ad 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -28,6 +28,69 @@ class SetupUnit(ProtocolUnit): + @staticmethod + def _check_states_compatibility(state_a, state_b): + """ + Checks that both states have the same solvent parameters and receptor. + + Parameters + ---------- + state_a : gufe.state.State + Origin state for the alchemical transformation. + state_b : + Destination state for the alchemical transformation. + """ + # If any of them has a solvent, check the parameters are the same + if any(["solvent" in state.components for state in (state_a, state_b)]): + assert state_a.get("solvent") == state_b.get("solvent"), "Solvent parameters differ between solvent components." + # check protein component is the same in both states if protein component is found + if any(["protein" in state.components for state in (state_a, state_b)]): + assert state_a.get("protein") == state_b.get("protein"), "Receptors in states are not compatible." + + @staticmethod + def _detect_phase(state_a, state_b): + """ + Detect phase according to the components in the input chemical state. + + Complex state is assumed if both states have ligands and protein components. + + Solvent state is assumed + + Vacuum state is assumed if only either a ligand or a protein is present + in each of the states. + + Parameters + ---------- + state_a : gufe.state.State + Source state for the alchemical transformation. + state_b : gufe.state.State + Destination state for the alchemical transformation. + + Returns + ------- + phase : str + Phase name. "vacuum", "solvent" or "complex". + component_keys : list[str] + List of component keys to extract from states. + """ + states = (state_a, state_b) + # where to store the data to be returned + + # Order of phases is important! We have to check complex first and solvent second. + key_options = { + "complex": ["ligand", "protein", "solvent"], + "solvent": ["ligand", "solvent"], + "vacuum": ["ligand"] + } + for phase, keys in key_options.items(): + if all([key in state for state in states for key in keys]): + detected_phase = phase + break + else: + raise ValueError( + "Could not detect phase from system states. Make sure the component in both systems match.") + + return detected_phase def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): """ @@ -168,6 +231,13 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): # SERIALIZE SYSTEM, STATE, INTEGRATOR + import pdb + pdb.set_trace() + + system_ = context.getSystem() + state_ = context.getState() + integrator_ = context.getIntegrator() + finally: # Explicit cleanup for GPU resources del context, integrator @@ -180,69 +250,6 @@ class SimulationUnit(ProtocolUnit): Monolithic unit for simulation. It runs NEQ switching simulation from chemical systems and stores the work computed in numpy-formatted files, to be analyzed by another unit. """ - @staticmethod - def _check_states_compatibility(state_a, state_b): - """ - Checks that both states have the same solvent parameters and receptor. - - Parameters - ---------- - state_a : gufe.state.State - Origin state for the alchemical transformation. - state_b : - Destination state for the alchemical transformation. - """ - # If any of them has a solvent, check the parameters are the same - if any(["solvent" in state.components for state in (state_a, state_b)]): - assert state_a.get("solvent") == state_b.get("solvent"), "Solvent parameters differ between solvent components." - # check protein component is the same in both states if protein component is found - if any(["protein" in state.components for state in (state_a, state_b)]): - assert state_a.get("protein") == state_b.get("protein"), "Receptors in states are not compatible." - - @staticmethod - def _detect_phase(state_a, state_b): - """ - Detect phase according to the components in the input chemical state. - - Complex state is assumed if both states have ligands and protein components. - - Solvent state is assumed - - Vacuum state is assumed if only either a ligand or a protein is present - in each of the states. - - Parameters - ---------- - state_a : gufe.state.State - Source state for the alchemical transformation. - state_b : gufe.state.State - Destination state for the alchemical transformation. - - Returns - ------- - phase : str - Phase name. "vacuum", "solvent" or "complex". - component_keys : list[str] - List of component keys to extract from states. - """ - states = (state_a, state_b) - # where to store the data to be returned - - # Order of phases is important! We have to check complex first and solvent second. - key_options = { - "complex": ["ligand", "protein", "solvent"], - "solvent": ["ligand", "solvent"], - "vacuum": ["ligand"] - } - for phase, keys in key_options.items(): - if all([key in state for state in states for key in keys]): - detected_phase = phase - break - else: - raise ValueError( - "Could not detect phase from system states. Make sure the component in both systems match.") - - return detected_phase @staticmethod def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not water"): @@ -689,8 +696,10 @@ def _create( # or JSON-serializable objects num_replicates = self.settings.num_replicates + setup = SetupUnit(state_a=stateA, state_b=stateB, mapping=mapping, settings=self.settings, name="setup") + simulations = [ - SimulationUnit(state_a=stateA, state_b=stateB, mapping=mapping, settings=self.settings, name=f"{replicate}") + SimulationUnit(setup=setup, settings=self.settings, name=f"{replicate}") for replicate in range(num_replicates)] end = ResultUnit(name="result", simulations=simulations) From 99abdaee589d8757b2763915c153bf3de4644482 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 9 Oct 2023 17:08:54 -0700 Subject: [PATCH 03/10] Additional concept sketching --- feflow/protocols/nonequilibrium_cycling.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index 85c48ad..3316a46 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -23,6 +23,8 @@ from openff.units import unit from openff.units.openmm import to_openmm +from ..utils.data import serialize + # Specific instance of logger for this module # logger = logging.getLogger(__name__) @@ -231,18 +233,25 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): # SERIALIZE SYSTEM, STATE, INTEGRATOR - import pdb - pdb.set_trace() - system_ = context.getSystem() state_ = context.getState() integrator_ = context.getIntegrator() + system_outfile = ctx.shared / 'system.xml.bz2' + state_outfile = ctx.shared / 'state.xml.bz2' + integrator_outfile = ctx.shared / 'integrator.xml.bz2' + + serialize(system_, system_outfile) + serialize(state_, state_outfile) + serialize(integrator_, integrator_outfile) + finally: # Explicit cleanup for GPU resources del context, integrator - return {} + return {'system': system_outfile, + 'state': state_outfile, + 'integrator': integrator_outfile} class SimulationUnit(ProtocolUnit): @@ -662,6 +671,7 @@ class NonEquilibriumCyclingProtocol(Protocol): of the same type of components as components in stateB. """ + _simulation_unit = SimulationUnit result_cls = NonEquilibriumCyclingProtocolResult def __init__(self, settings: Settings): @@ -699,7 +709,7 @@ def _create( setup = SetupUnit(state_a=stateA, state_b=stateB, mapping=mapping, settings=self.settings, name="setup") simulations = [ - SimulationUnit(setup=setup, settings=self.settings, name=f"{replicate}") + self._simulation_unit(setup=setup, settings=self.settings, name=f"{replicate}") for replicate in range(num_replicates)] end = ResultUnit(name="result", simulations=simulations) From 44c324dbe65c2f501cfe7d2b014adb615660ac34 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Mon, 9 Oct 2023 21:04:52 -0700 Subject: [PATCH 04/10] Added utils/data.py --- feflow/utils/data.py | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 feflow/utils/data.py diff --git a/feflow/utils/data.py b/feflow/utils/data.py new file mode 100644 index 0000000..42dcf96 --- /dev/null +++ b/feflow/utils/data.py @@ -0,0 +1,72 @@ +import os +import pathlib + + +def serialize(item, filename: pathlib.Path): + """ + Serialize an OpenMM System, State, or Integrator. + + Parameters + ---------- + item : System, State, or Integrator + The thing to be serialized + filename : str + The filename to serialize to + """ + from openmm import XmlSerializer + + # Create parent directory if it doesn't exist + filename_basedir = filename.parent + if not filename_basedir.exists(): + os.makedirs(filename_basedir) + + if filename.suffix == '.gz': + import gzip + with gzip.open(filename, mode='wb') as outfile: + serialized_thing = XmlSerializer.serialize(item) + outfile.write(serialized_thing.encode()) + if filename.suffix == '.bz2': + import bz2 + with bz2.open(filename, mode='wb') as outfile: + serialized_thing = XmlSerializer.serialize(item) + outfile.write(serialized_thing.encode()) + else: + with open(filename, mode='w') as outfile: + serialized_thing = XmlSerializer.serialize(item) + outfile.write(serialized_thing) + + +def deserialize(filename: pathlib.Path): + """ + Deserialize an OpenMM System, State, or Integrator. + + Parameters + ---------- + item : System, State, or Integrator + The thing to be serialized + filename : str + The filename to serialize to + """ + from openmm import XmlSerializer + + # Create parent directory if it doesn't exist + filename_basedir = filename.parent + if not filename_basedir.exists(): + os.makedirs(filename_basedir) + + if filename.suffix == '.gz': + import gzip + with gzip.open(filename, mode='rb') as infile: + serialized_thing = infile.read().decode() + item = XmlSerializer.deserialize(serialized_thing) + if filename.suffix == '.bz2': + import bz2 + with bz2.open(filename, mode='rb') as infile: + serialized_thing = infile.read().decode() + item = XmlSerializer.deserialize(serialized_thing) + else: + with open(filename, mode='r') as infile: + serialized_thing = infile.read() + item = XmlSerializer.deserialize(serialized_thing) + + return item From 45e656873ec508382cc281b84d88bed26addbb00 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 14:08:52 -0700 Subject: [PATCH 05/10] Removed need for HybridTopologyFactory in SimulationUnit This avoids: - serlializing/deserializing/remaking the HybridTopologyFactory in the SimulationUnit - makes it easier to make a Folding@Home-compatible version, if we want to hold on to trajectory data in a similar way --- feflow/protocols/nonequilibrium_cycling.py | 112 +++++++++++---------- 1 file changed, 57 insertions(+), 55 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index 2e48107..f58c17b 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -23,7 +23,7 @@ from openff.units import unit from openff.units.openmm import to_openmm, from_openmm -from ..utils.data import serialize +from ..utils.data import serialize, deserialize # Specific instance of logger for this module # logger = logging.getLogger(__name__) @@ -148,31 +148,26 @@ def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): """ - Execute the simulation part of the Nonequilibrium switching protocol using GUFE objects. + Execute the setup part of the nonequilibrium switching protocol. Parameters ---------- ctx: gufe.protocols.protocolunit.Context The gufe context for the unit. - state_a : gufe.ChemicalSystem The initial chemical system. - - state_b : gufe.ChemicalSystem The objective chemical system. - mapping : dict[str, gufe.mapping.ComponentMapping] A dict featuring mappings between the two chemical systems. - settings : gufe.settings.model.Settings The full settings for the protocol. Returns ------- dict : dict[str, str] - Dictionary with paths to work arrays, both forward and reverse, and trajectory coordinates for systems - A and B. + Dictionary with paths to work arrays, both forward and reverse, and + trajectory coordinates for systems A and B. """ # needed imports import numpy as np @@ -187,6 +182,8 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): # Check compatibility between states (same receptor and solvent) self._check_states_compatibility(state_a, state_b) + phase = self._detect_phase(state_a, state_b) # infer phase from systems and components + # Get components from systems if found (None otherwise) -- NOTE: Uses hardcoded keys! receptor_a = state_a.components.get("protein") # receptor_b = state_b.components.get("protein") # Should not be needed @@ -310,10 +307,6 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): ) ####### END OF SETUP ######### - traj_save_frequency = settings.traj_save_frequency - work_save_frequency = settings.work_save_frequency # Note: this is divisor of traj save freq. - selection_expression = settings.atom_selection_expression - system = hybrid_factory.hybrid_system positions = hybrid_factory.hybrid_positions @@ -363,7 +356,11 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): return {'system': system_outfile, 'state': state_outfile, - 'integrator': integrator_outfile} + 'integrator': integrator_outfile, + 'phase': phase, + 'old_atom_indices': hybrid_factory.old_atom_indices, + 'new_atom_indices': hybrid_factory.new_atom_indices, + } class SimulationUnit(ProtocolUnit): @@ -373,7 +370,7 @@ class SimulationUnit(ProtocolUnit): """ @staticmethod - def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not water"): + def extract_positions(context, old_atom_indices, new_atom_indices): """ Extract positions from initial and final systems based from the hybrid topology. @@ -383,8 +380,6 @@ def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not Current simulation context where from extract positions. hybrid_topology_factory: perses.annihilation.relative.HybridTopologyFactory Hybrid topology factory where to extract positions and mapping information - atom_selection_exp: str, optional - Atom selection expression using mdtraj syntax. Defaults to "not water" Returns ------- @@ -400,29 +395,19 @@ def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not 3. Merge that information into mdtraj.Trajectory 4. Filter positions for initial/final according to selection string """ - # TODO: Maybe we want this as a helper/utils function in perses. We also need tests for this. - import mdtraj as md import numpy as np # Get positions from current openmm context positions = context.getState(getPositions=True).getPositions(asNumpy=True) - # Get topology from HTF - indices for initial and final topologies in hybrid topology - initial_indices = np.asarray(hybrid_topology_factory.old_atom_indices) - final_indices = np.asarray(hybrid_topology_factory.new_atom_indices) - hybrid_topology = hybrid_topology_factory.hybrid_topology - selection = atom_selection_exp - md_trajectory = md.Trajectory(xyz=positions, topology=hybrid_topology) - selection_indices = md_trajectory.topology.select(selection) + # Get indices for initial and final topologies in hybrid topology + initial_indices = np.asarray(old_atom_indices) + final_indices = np.asarray(new_atom_indices) - # Now we have to find the intersection/overlap between selected indices in the hybrid - # topology and the initial/final positions, respectively - initial_selected_indices = np.intersect1d(initial_indices, selection_indices) - final_selected_indices = np.intersect1d(final_indices, selection_indices) - initial_selected_positions = md_trajectory.xyz[0, initial_selected_indices, :] - final_selected_positions = md_trajectory.xyz[0, final_selected_indices, :] + initial_positions = positions[initial_indices, :] + final_positions = positions[final_indices, :] - return initial_selected_positions, final_selected_positions + return initial_positions, final_positions def _execute(self, ctx, *, setup, settings, **inputs): """ @@ -443,7 +428,6 @@ def _execute(self, ctx, *, setup, settings, **inputs): Dictionary with paths to work arrays, both forward and reverse, and trajectory coordinates for systems A and B. """ - # needed imports import numpy as np import openmm import openmm.unit as openmm_unit @@ -459,13 +443,26 @@ def _execute(self, ctx, *, setup, settings, **inputs): file_handler.setFormatter(log_formatter) file_logger.addHandler(file_handler) - # GET STATE, SYSTEM, AND INTEGRATOR FROM SETUP UNIT + # Get state, system, and integrator from setup unit + system = deserialize(setup.outputs['system']) + state = deserialize(setup.outputs['state']) + integrator = deserialize(setup.outputs['integrator']) + + # Get atom indices for either end of the hybrid topology + old_atom_indices = setup.output['old_atom_indices'] + new_atom_indices = setup.output['new_atom_indices'] # Set up context platform = get_openmm_platform(settings.platform) context = openmm.Context(system, integrator, platform) - context.setPeriodicBoxVectors(*system.getDefaultPeriodicBoxVectors()) - context.setPositions(positions) + context.setState(state) + + # Extract settings used below + neq_steps = settings.eq_steps + eq_steps = settings.neq_steps + traj_save_frequency = settings.traj_save_frequency + work_save_frequency = settings.work_save_frequency # Note: this is divisor of traj save freq. + selection_expression = settings.atom_selection_expression try: @@ -475,7 +472,7 @@ def _execute(self, ctx, *, setup, settings, **inputs): # Coarse number of steps -- each coarse consists of work_save_frequency steps coarse_eq_steps = int(eq_steps/work_save_frequency) # Note: eq_steps is multiple of work save steps - coarse_neq_steps = int(neq_steps / work_save_frequency) # Note: neq_steps is multiple of work save steps + coarse_neq_steps = int(neq_steps/work_save_frequency) # Note: neq_steps is multiple of work save steps # TODO: Also get the GPU information (plain try-except with nvidia-smi) @@ -489,14 +486,15 @@ def _execute(self, ctx, *, setup, settings, **inputs): if step % traj_save_frequency == 0: file_logger.debug(f"coarse step: {step}: saving trajectory (freq {traj_save_frequency})") initial_positions, final_positions = self.extract_positions(context, - hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + old_atom_indices, + new_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) # Make sure trajectories are stored at the end of the eq loop file_logger.debug(f"coarse step: {step}: saving trajectory (freq {traj_save_frequency})") - initial_positions, final_positions = self.extract_positions(context, hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + initial_positions, final_positions = self.extract_positions(context, + old_atom_indices, + new_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) @@ -513,13 +511,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): forward_works.append(integrator.get_protocol_work(dimensionless=True)) if fwd_step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + old_atom_indices, + new_atom_indices) forward_neq_old.append(initial_positions) forward_neq_new.append(final_positions) # Make sure trajectories are stored at the end of the neq loop - initial_positions, final_positions = self.extract_positions(context, hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + initial_positions, final_positions = self.extract_positions(context, + old_atom_indices, + new_atom_indices) forward_neq_old.append(initial_positions) forward_neq_new.append(final_positions) @@ -532,13 +531,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): integrator.step(work_save_frequency) if step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + old_atom_indices, + new_atom_indices) reverse_eq_new.append(initial_positions) # TODO: Maybe better naming not old/new but initial/final reverse_eq_old.append(final_positions) # Make sure trajectories are stored at the end of the eq loop - initial_positions, final_positions = self.extract_positions(context, hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + initial_positions, final_positions = self.extract_positions(context, + old_atom_indices, + new_atom_indices) reverse_eq_old.append(initial_positions) reverse_eq_new.append(final_positions) @@ -553,13 +553,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): reverse_works.append(integrator.get_protocol_work(dimensionless=True)) if rev_step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + old_atom_indices, + new_atom_indices) reverse_neq_old.append(initial_positions) reverse_neq_new.append(final_positions) # Make sure trajectories are stored at the end of the neq loop - initial_positions, final_positions = self.extract_positions(context, hybrid_topology_factory=hybrid_factory, - atom_selection_exp=selection_expression) + initial_positions, final_positions = self.extract_positions(context, + old_atom_indices, + new_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) @@ -571,6 +572,7 @@ def _execute(self, ctx, *, setup, settings, **inputs): file_logger.info(f"replicate_{self.name} Nonequilibrium cycle total walltime: {cycle_walltime}") # Computing performance in ns/day + timestep = to_openmm(settings.timestep) simulation_time = 2*(eq_steps + neq_steps)*timestep walltime_in_seconds = cycle_walltime.total_seconds() * openmm_unit.seconds estimated_performance = simulation_time.value_in_unit( @@ -578,7 +580,7 @@ def _execute(self, ctx, *, setup, settings, **inputs): file_logger.info(f"replicate_{self.name} Estimated performance: {estimated_performance} ns/day") # Serialize works - phase = self._detect_phase(state_a, state_b) # infer phase from systems and components + phase = setup.outputs['phase'] forward_work_path = ctx.shared / f"forward_{phase}_{self.name}.npy" reverse_work_path = ctx.shared / f"reverse_{phase}_{self.name}.npy" with open(forward_work_path, 'wb') as out_file: From afac441b3bc19db8c92564b60998ab4423ea34d0 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 14:39:18 -0700 Subject: [PATCH 06/10] old -> initial, new -> final --- feflow/protocols/nonequilibrium_cycling.py | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index f58c17b..603c586 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -358,8 +358,8 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): 'state': state_outfile, 'integrator': integrator_outfile, 'phase': phase, - 'old_atom_indices': hybrid_factory.old_atom_indices, - 'new_atom_indices': hybrid_factory.new_atom_indices, + 'initial_atom_indices': hybrid_factory.initial_atom_indices, + 'final_atom_indices': hybrid_factory.final_atom_indices, } @@ -370,7 +370,7 @@ class SimulationUnit(ProtocolUnit): """ @staticmethod - def extract_positions(context, old_atom_indices, new_atom_indices): + def extract_positions(context, initial_atom_indices, final_atom_indices): """ Extract positions from initial and final systems based from the hybrid topology. @@ -401,8 +401,8 @@ def extract_positions(context, old_atom_indices, new_atom_indices): positions = context.getState(getPositions=True).getPositions(asNumpy=True) # Get indices for initial and final topologies in hybrid topology - initial_indices = np.asarray(old_atom_indices) - final_indices = np.asarray(new_atom_indices) + initial_indices = np.asarray(initial_atom_indices) + final_indices = np.asarray(final_atom_indices) initial_positions = positions[initial_indices, :] final_positions = positions[final_indices, :] @@ -449,8 +449,8 @@ def _execute(self, ctx, *, setup, settings, **inputs): integrator = deserialize(setup.outputs['integrator']) # Get atom indices for either end of the hybrid topology - old_atom_indices = setup.output['old_atom_indices'] - new_atom_indices = setup.output['new_atom_indices'] + initial_atom_indices = setup.output['initial_atom_indices'] + final_atom_indices = setup.output['final_atom_indices'] # Set up context platform = get_openmm_platform(settings.platform) @@ -486,15 +486,15 @@ def _execute(self, ctx, *, setup, settings, **inputs): if step % traj_save_frequency == 0: file_logger.debug(f"coarse step: {step}: saving trajectory (freq {traj_save_frequency})") initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) # Make sure trajectories are stored at the end of the eq loop file_logger.debug(f"coarse step: {step}: saving trajectory (freq {traj_save_frequency})") initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) @@ -511,14 +511,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): forward_works.append(integrator.get_protocol_work(dimensionless=True)) if fwd_step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) forward_neq_old.append(initial_positions) forward_neq_new.append(final_positions) # Make sure trajectories are stored at the end of the neq loop initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) forward_neq_old.append(initial_positions) forward_neq_new.append(final_positions) @@ -531,14 +531,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): integrator.step(work_save_frequency) if step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) reverse_eq_new.append(initial_positions) # TODO: Maybe better naming not old/new but initial/final reverse_eq_old.append(final_positions) # Make sure trajectories are stored at the end of the eq loop initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) reverse_eq_old.append(initial_positions) reverse_eq_new.append(final_positions) @@ -553,14 +553,14 @@ def _execute(self, ctx, *, setup, settings, **inputs): reverse_works.append(integrator.get_protocol_work(dimensionless=True)) if rev_step % traj_save_frequency == 0: initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) reverse_neq_old.append(initial_positions) reverse_neq_new.append(final_positions) # Make sure trajectories are stored at the end of the neq loop initial_positions, final_positions = self.extract_positions(context, - old_atom_indices, - new_atom_indices) + initial_atom_indices, + final_atom_indices) forward_eq_old.append(initial_positions) forward_eq_new.append(final_positions) From d1350eeed17f3496157a36e6e98451fd5043abe0 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 14:40:40 -0700 Subject: [PATCH 07/10] Remove redundant extract_positions method --- feflow/protocols/nonequilibrium_cycling.py | 52 ---------------------- 1 file changed, 52 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index 603c586..c5b573f 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -94,58 +94,6 @@ def _detect_phase(state_a, state_b): return detected_phase - @staticmethod - def extract_positions(context, hybrid_topology_factory, atom_selection_exp="not water"): - """ - Extract positions from initial and final systems based from the hybrid topology. - - Parameters - ---------- - context: openmm.Context - Current simulation context where from extract positions. - hybrid_topology_factory: perses.annihilation.relative.HybridTopologyFactory - Hybrid topology factory where to extract positions and mapping information - atom_selection_exp: str, optional - Atom selection expression using mdtraj syntax. Defaults to "not water" - - Returns - ------- - - Notes - ----- - It achieves this by taking the positions and indices from the initial and final states of - the transformation, and computing the overlap of these with the indices of the complete - hybrid topology, filtered by some mdtraj selection expression. - - 1. Get positions from context - 2. Get topology from HTF (already mdtraj topology) - 3. Merge that information into mdtraj.Trajectory - 4. Filter positions for initial/final according to selection string - """ - # TODO: Maybe we want this as a helper/utils function in perses. We also need tests for this. - import mdtraj as md - import numpy as np - - # Get positions from current openmm context - positions = context.getState(getPositions=True).getPositions(asNumpy=True) - - # Get topology from HTF - indices for initial and final topologies in hybrid topology - initial_indices = np.asarray(hybrid_topology_factory.initial_atom_indices) - final_indices = np.asarray(hybrid_topology_factory.final_atom_indices) - hybrid_topology = hybrid_topology_factory.hybrid_topology - selection = atom_selection_exp - md_trajectory = md.Trajectory(xyz=positions, topology=hybrid_topology) - selection_indices = md_trajectory.topology.select(selection) - - # Now we have to find the intersection/overlap between selected indices in the hybrid - # topology and the initial/final positions, respectively - initial_selected_indices = np.intersect1d(initial_indices, selection_indices) - final_selected_indices = np.intersect1d(final_indices, selection_indices) - initial_selected_positions = md_trajectory.xyz[0, initial_selected_indices, :] - final_selected_positions = md_trajectory.xyz[0, final_selected_indices, :] - - return initial_selected_positions, final_selected_positions - def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): """ Execute the setup part of the nonequilibrium switching protocol. From af1d09bba3374adac719951a91bef2526df84ef9 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 14:41:05 -0700 Subject: [PATCH 08/10] Add note on atom_selection_expression no longer used --- feflow/settings/nonequilibrium_cycling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feflow/settings/nonequilibrium_cycling.py b/feflow/settings/nonequilibrium_cycling.py index e2c97d3..903dc4e 100644 --- a/feflow/settings/nonequilibrium_cycling.py +++ b/feflow/settings/nonequilibrium_cycling.py @@ -69,7 +69,7 @@ class Config: platform = 'CUDA' traj_save_frequency: int = 2000 work_save_frequency: int = 500 - atom_selection_expression: str = "not water" + atom_selection_expression: str = "not water" # no longer used # Number of replicates to run (1 cycle/replicate) num_replicates: int = 1 From fdb82324d64387e60f29ea98fc1668a45ea36b87 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 20:18:59 -0700 Subject: [PATCH 09/10] Tests passing again. :D --- feflow/protocols/nonequilibrium_cycling.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index c5b573f..a906df0 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -281,13 +281,10 @@ def _execute(self, ctx, *, state_a, state_b, mapping, settings, **inputs): # Minimize openmm.LocalEnergyMinimizer.minimize(context) - # Equilibrate - context.setVelocitiesToTemperature(temperature) - # SERIALIZE SYSTEM, STATE, INTEGRATOR system_ = context.getSystem() - state_ = context.getState() + state_ = context.getState(getPositions=True) integrator_ = context.getIntegrator() system_outfile = ctx.shared / 'system.xml.bz2' @@ -380,7 +377,6 @@ def _execute(self, ctx, *, setup, settings, **inputs): import openmm import openmm.unit as openmm_unit from openmmtools.integrators import PeriodicNonequilibriumIntegrator - from perses.utils.openeye import generate_unique_atom_names # Setting up logging to file in shared filesystem file_logger = logging.getLogger("neq-cycling") @@ -395,16 +391,22 @@ def _execute(self, ctx, *, setup, settings, **inputs): system = deserialize(setup.outputs['system']) state = deserialize(setup.outputs['state']) integrator = deserialize(setup.outputs['integrator']) + PeriodicNonequilibriumIntegrator.restore_interface(integrator) # Get atom indices for either end of the hybrid topology - initial_atom_indices = setup.output['initial_atom_indices'] - final_atom_indices = setup.output['final_atom_indices'] + initial_atom_indices = setup.outputs['initial_atom_indices'] + final_atom_indices = setup.outputs['final_atom_indices'] # Set up context platform = get_openmm_platform(settings.platform) context = openmm.Context(system, integrator, platform) context.setState(state) + # Equilibrate + thermodynamic_settings = settings.thermo_settings + temperature = to_openmm(thermodynamic_settings.temperature) + context.setVelocitiesToTemperature(temperature) + # Extract settings used below neq_steps = settings.eq_steps eq_steps = settings.neq_steps From 50cf5483661ed5f3652854f67599730b7933ad4c Mon Sep 17 00:00:00 2001 From: David Dotson Date: Fri, 13 Oct 2023 20:31:40 -0700 Subject: [PATCH 10/10] Removing sample test module from cookiecutter --- feflow/tests/test_feflow.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 feflow/tests/test_feflow.py diff --git a/feflow/tests/test_feflow.py b/feflow/tests/test_feflow.py deleted file mode 100644 index 118bfd0..0000000 --- a/feflow/tests/test_feflow.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Unit and regression test for the feflow package. -""" - -# Import package, test suite, and other packages as needed -import sys - -import pytest - -import feflow - - -def test_feflow_imported(): - """Sample test, will always pass so long as import statement worked.""" - print("importing ", feflow.__name__) - assert "feflow" in sys.modules - - -# Assert that a certain exception is raised -def f(): - raise SystemExit(1) - - -def test_mytest(): - with pytest.raises(SystemExit): - f()