diff --git a/docs/source/featurizers/global/raspa.rst b/docs/source/featurizers/global/raspa.rst index 91ce30df..fcae6f98 100644 --- a/docs/source/featurizers/global/raspa.rst +++ b/docs/source/featurizers/global/raspa.rst @@ -47,3 +47,21 @@ The Henry coefficient is strongly connected to the energy grid and might sometim :scalar: False We use the implementation in [Dubbeldam]_ + + + +Loading calculation +.................. + +The loading is the pure component uptake on a MOF. + +.. featurizer:: Loading + :id: Loading + :considers_geometry: True + :considers_structure_graph: False + :encodes_chemistry: True + :scope: global + :scalar: True + + We use the implementation in [Dubbeldam]_ + diff --git a/src/mofdscribe/featurizers/__init__.py b/src/mofdscribe/featurizers/__init__.py index 73f995e3..a7eda53e 100644 --- a/src/mofdscribe/featurizers/__init__.py +++ b/src/mofdscribe/featurizers/__init__.py @@ -22,6 +22,7 @@ from .chemistry import RACS # noqa: F401 from .chemistry import EnergyGridHistogram # noqa: F401 from .chemistry import Henry # noqa: F401 +from .chemistry import Loading # noqa: F401 from .chemistry import PartialChargeHistogram # noqa: F401 from .chemistry import PartialChargeStats # noqa: F401 from .chemistry import PriceLowerBound # noqa: F401 diff --git a/src/mofdscribe/featurizers/chemistry/__init__.py b/src/mofdscribe/featurizers/chemistry/__init__.py index ade4df4c..27b3909e 100644 --- a/src/mofdscribe/featurizers/chemistry/__init__.py +++ b/src/mofdscribe/featurizers/chemistry/__init__.py @@ -4,6 +4,7 @@ from .aprdf import APRDF # noqa: F401 from .energygrid import EnergyGridHistogram # noqa: F401 from .henry import Henry # noqa: F401 +from .loading import Loading # noqa: F401 from .partialchargehistogram import PartialChargeHistogram # noqa: F401 from .partialchargestats import PartialChargeStats # noqa: F401 from .price import PriceLowerBound # noqa: F401 diff --git a/src/mofdscribe/featurizers/chemistry/energygrid.py b/src/mofdscribe/featurizers/chemistry/energygrid.py index 6aab9602..92684def 100644 --- a/src/mofdscribe/featurizers/chemistry/energygrid.py +++ b/src/mofdscribe/featurizers/chemistry/energygrid.py @@ -178,10 +178,10 @@ def __init__( super().__init__(primitive=primitive) def fit_transform(self, structures: List[Union[Structure, IStructure]]): - ... + pass def fit(self, structure: Union[Structure, IStructure]): - ... + pass def _get_grid(self): return np.arange(self.min_energy_vdw, self.max_energy_vdw, self.bin_size_vdw) diff --git a/src/mofdscribe/featurizers/chemistry/loading.py b/src/mofdscribe/featurizers/chemistry/loading.py new file mode 100644 index 00000000..4df9212e --- /dev/null +++ b/src/mofdscribe/featurizers/chemistry/loading.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +"""Featurizer that runs RASPA to calculate the Henry coefficient.""" +import os +from glob import glob +from typing import List, Union + +import numpy as np +from pymatgen.core import IStructure, Structure + +from mofdscribe.featurizers.base import MOFBaseFeaturizer +from mofdscribe.featurizers.utils.extend import operates_on_istructure, operates_on_structure +from mofdscribe.featurizers.utils.raspa.base_parser import parse_base_output +from mofdscribe.featurizers.utils.raspa.resize_uc import resize_unit_cell +from mofdscribe.featurizers.utils.raspa.run_raspa import detect_raspa_dir, run_raspa + +__all__ = ["Loading"] +LOADING_INPUT_TEMPLATE = """SimulationType MonteCarlo +NumberOfCycles {cycles} +NumberOfInitializationCycles 4000 +PrintEvery {print_every} +RestartFile no + +Forcefield Local +UseChargesFromCIFFile yes + +Framework 0 +FrameworkName input +UnitCells {unit_cells} +HeliumVoidFraction 0.29 +ExternalTemperature {temp} +ExternalPressure {press} + +CutOff {cutoff} + +Component 0 MoleculeName {molname} + MoleculeDefinition Local + TranslationProbability 0.5 + RotationProbability 0.5 + ReinsertionProbability 0.5 + SwapProbability 1.0 + CreateNumberOfMolecules 0 + + +""" + + +def parse_loading(directory: Union[str, os.PathLike]) -> dict: + """Parse the widom output files in the given directory.""" + outputs = glob(os.path.join(directory, "Output", "System_0", "*.data")) + if len(outputs) != 1: + raise ValueError("Expected one output file, got {}".format(len(outputs))) + + parsed_output, _ = parse_base_output(outputs[0], system_name="System_0", ncomponents=1) + + component_results = list(parsed_output["components"].values())[0] + + return [ + component_results["loading_molar_absolute_average"], + component_results["loading_molar_excess_average"], + ], [ + component_results["loading_molar_absolute_dev"], + component_results["loading_molar_excess_dev"], + ] + + +@operates_on_structure +@operates_on_istructure +class Loading(MOFBaseFeaturizer): + """Computes the uptake for a given molecule using the RASPA [1]_ program. + + While loading are sometimes the targets of ML + algorithms, they are sometimes also used as features. + """ + + def __init__( + self, + raspa_dir: Union[str, os.PathLike, None] = None, + cycles: int = 6000, + temperature: float = 298, + pressure: float = 101325, + cutoff: float = 12, + mof_ff: str = "UFF", + mol_ff: str = "TraPPE", + mol_name: str = "CO2", + tail_corrections: bool = True, + mixing_rule: str = "Lorentz-Berthelot", + shifted: bool = False, + separate_interactions: bool = True, + run_eqeq: bool = True, + return_std: bool = False, + primitive: bool = False, + ): + """Initialize the featurizer. + + Args: + raspa_dir (Union[str, PathLike, None]): Path to the raspa + directory (with lib, bin, share) subdirectories. + If `None` we will look for the `RASPA_DIR` environment variable. + Defaults to None. + cycles (int): Number of simulation cycles. + Defaults to 5_000. + temperature (float): Simulation temperature in + Kelvin. Defaults to 300. + pressure (float): Simluation pressure in Pascals. + Defaults to 101325. + cutoff (float): Cutoff for simulation in Angstrom. + Defaults to 12. + mof_ff (str): Name of the forcefield used for the framework. + Defaults to "UFF". + mol_ff (str): Name of the forcefield used for the guest molecule. + Defaults to "TraPPE". + mol_name (str): Name of the guest molecule. Defaults to "CO2". + tail_corrections (bool): If true, use analytical tail-correction + for the contribution of the interaction potential after the + cutoff. Defaults to True. + mixing_rule (str): Mixing rule for framework and guest + molecule force field. Available options are `Jorgenson` and + `Lorentz-Berthelot`. Defaults to "Lorentz-Berthelot". + shifted (bool): If true, shifts the potential to equal to zero at the + cutoff. Defaults to False. + separate_interactions (bool): If True use framework's force field + for framework-molecule interactions. + Defaults to True. + run_eqeq (bool): If true, runs EqEq to compute charges. + Defaults to True. + return_std (bool): If true, return the standard deviations. + Defaults to False. + primitive (bool): If true, use the primitive unit cell. + Defaults to False. + + Raises: + ValueError: If the `RASPA_DIR` environment variable is not set. + """ + self.raspa_dir = raspa_dir if raspa_dir else os.environ.get("RASPA_DIR") + if self.raspa_dir is None: + try: + self.raspa_dir = detect_raspa_dir() + except ValueError: + raise ValueError( + "Please set the RASPA_DIR environment variable or provide the path for the class initialization." + ) + self.cycles = cycles + self.cutoff = cutoff + self.mof_ff = mof_ff + self.mol_ff = mol_ff + self.mol_name = mol_name + self.tail_corrections = tail_corrections + self.mixing_rule = mixing_rule + self.shifted = shifted + self.separate_interactions = separate_interactions + self.temperature = temperature + self.pressure = pressure + self.run_eqeq = run_eqeq + self.return_std = return_std + super().__init__(primitive=primitive) + + def _featurize(self, s: Union[Structure, IStructure]) -> np.array: + ff_molecules = {self.mol_name: self.mol_ff} + + parameters = { + "ff_framework": self.mof_ff, + "ff_molecules": ff_molecules, + "shifted": self.shifted, + "tail_corrections": self.tail_corrections, + "mixing_rule": self.mixing_rule, + "separate_interactions": self.separate_interactions, + } + replicas = resize_unit_cell(s, self.cutoff) + ucells = f"{replicas[0]} {replicas[1]} {replicas[2]}" + simulation_script = LOADING_INPUT_TEMPLATE.format( + cycles=self.cycles, + unit_cells=ucells, + cutoff=self.cutoff, + print_every=self.cycles // 10, + molname=self.mol_name, + temp=self.temperature, + press=self.pressure, + ) + + res, deviations = run_raspa( + s, + self.raspa_dir, + simulation_script, + parameters, + parse_loading, + self.run_eqeq, + ) + + if self.return_std: + res.extend(deviations) + return np.array(res) + + def feature_labels(self) -> List[str]: + feat = [ + f"loading_{self.mol_name}_{self.temperature}_{self.pressure}_mol/kg", + f"loading_excess_{self.mol_name}_{self.temperature}_{self.pressure}_mol/kg", + ] + if self.return_std: + feat.extend( + [ + f"loading_std_{self.mol_name}_{self.temperature}_{self.pressure}_mol/kg", + f"loading_excess_std_{self.mol_name}_{self.temperature}_{self.pressure}_mol/kg", + ] + ) + return feat + + def implementors(self) -> List[str]: + return ["Fergus Mcilwaine"] + + def citations(self) -> List[str]: + return [ + "@article{Dubbeldam2015," + "doi = {10.1080/08927022.2015.1010082}," + "url = {https://doi.org/10.1080/08927022.2015.1010082}," + "year = {2015}," + "month = feb," + "publisher = {Informa {UK} Limited}," + "volume = {42}," + "number = {2}," + "pages = {81--101}," + r"author = {David Dubbeldam and Sof{'{\i}}a Calero and " + "Donald E. Ellis and Randall Q. Snurr}," + "title = {{RASPA}: molecular simulation software for adsorption " + "and diffusion in flexible nanoporous materials}," + "journal = {Molecular Simulation}" + "}" + ] diff --git a/src/mofdscribe/featurizers/utils/raspa/base_parser.py b/src/mofdscribe/featurizers/utils/raspa/base_parser.py index df6299e0..27df3fcf 100644 --- a/src/mofdscribe/featurizers/utils/raspa/base_parser.py +++ b/src/mofdscribe/featurizers/utils/raspa/base_parser.py @@ -316,6 +316,18 @@ def parse_base_output(output_abs_path, system_name, ncomponents): # noqa: C901 res_per_component[icomponent]["loading_excess_average"] = float(line.split()[5]) res_per_component[icomponent]["loading_excess_dev"] = float(line.split()[7]) res_per_component[icomponent]["loading_excess_unit"] = "molecules/unit cell" + elif "Average loading absolute [mol/kg framework]" in line: + res_per_component[icomponent]["loading_molar_absolute_average"] = float( + line.split()[5] + ) + res_per_component[icomponent]["loading_molar_absolute_dev"] = float(line.split()[7]) + res_per_component[icomponent]["loading_molar_absolute_unit"] = "mol/kg framework" + elif "Average loading excess [mol/kg framework]" in line: + res_per_component[icomponent]["loading_molar_excess_average"] = float( + line.split()[5] + ) + res_per_component[icomponent]["loading_molar_excess_dev"] = float(line.split()[7]) + res_per_component[icomponent]["loading_molar_excess_unit"] = "mol/kg framework" icomponent += 1 if icomponent >= ncomponents: break diff --git a/tests/featurizers/bu/test_rdkit_adaptor.py b/tests/featurizers/bu/test_rdkit_adaptor.py index bd2652b1..1bb9cd89 100644 --- a/tests/featurizers/bu/test_rdkit_adaptor.py +++ b/tests/featurizers/bu/test_rdkit_adaptor.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- """Test the RDkitAdaptor.""" -import numpy as np from rdkit.Chem.Descriptors3D import Asphericity from mofdscribe.featurizers.bu.rdkitadaptor import RDKitAdaptor diff --git a/tests/featurizers/chemistry/test_loading.py b/tests/featurizers/chemistry/test_loading.py new file mode 100644 index 00000000..070b65f1 --- /dev/null +++ b/tests/featurizers/chemistry/test_loading.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +"""Test the Loading featurizer.""" +from mofdscribe.featurizers.chemistry.loading import Loading + +from ..helpers import is_jsonable + + +def test_loadinghoa(hkust_structure, irmof_structure): + """Make sure that the featurization works for typical MOFs and the number of features is as expected.""" + featurizer = Loading(cycles=10) + features = featurizer.featurize(hkust_structure) + labels = featurizer.feature_labels() + assert len(features) == len(labels) + features_irmof = featurizer.featurize(irmof_structure) + assert sum(abs(features - features_irmof)) > 0 + assert is_jsonable(dict(zip(featurizer.feature_labels(), features))) + assert features_irmof.ndim == 1 + + featurizer = Loading(cycles=500, return_std=True) + features = featurizer.featurize(hkust_structure) + assert len(features) == len(featurizer.feature_labels()) == 4 + + # make sure we indeed use pyeqeq + featurizer = Loading(cycles=500, return_std=True, run_eqeq=False) + features_no_charge = featurizer.featurize(hkust_structure) + assert features_no_charge[0] < features[0]