feat: add two env variables, update hashing

cheminfo-py · Dec 8, 2021 · fbc69ed · fbc69ed
1 parent 3683f28
commit fbc69ed
Show file tree

Hide file tree

Showing 7 changed files with 72 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@ docker-compose up
 You have the option to customize the behavior of the app using environment variables:
 
 - `IMAGINARY_FREQ_THRESHOLD`: sets the maximum energy in cm-1 for imaginary frequency (if this threshold is exceeded, the output will contain a warning)
-- `MAX_ATOMS`: if the input contains more than this number of atoms, an error is thrown
+- `MAX_ATOMS_XTB`/`MAX_ATOMS_FF`: if the input contains more than this number of atoms, an error is thrown
 - `TIMEOUT`: If the request takes longer than this time (in seconds) a `TimeOut` error is raised
 - `CACHEDIR`: Sets the directory for the diskcache. It will be mounted by the docker container.
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -9,7 +9,8 @@ services:
       - CACHEDIR=/ircache
       - WORKERS=${WORKERS}
       - OMP_NUM_THREADS=${OMP_NUM_THREADS}
-      - MAX_ATOMS=50
+      - MAX_ATOMS_XTB=60
+      - MAX_ATOMS_FF=100
       - TIMEOUT=${TIMEOUT}
       - LOG_LEVEL=debug
     ports:

diff --git a/xtbservice/ir.py b/xtbservice/ir.py
@@ -15,12 +15,27 @@
 from .cache import ir_cache, ir_from_molfile_cache, ir_from_smiles_cache
 from .models import IRResult
 from .optimize import run_xtb_opt
-from .settings import IMAGINARY_FREQ_THRESHOLD, TIMEOUT
-from .utils import get_hash, get_moments_of_inertia, hash_atoms, molfile2ase, smiles2ase
+from .settings import IMAGINARY_FREQ_THRESHOLD, MAX_ATOMS_FF, MAX_ATOMS_XTB, TIMEOUT
+from .utils import (
+    get_moments_of_inertia,
+    hash_atoms,
+    hash_object,
+    molfile2ase,
+    smiles2ase,
+)
+
+
+def get_max_atoms(method):
+    if method == "GFNFF":
+        return MAX_ATOMS_FF
+    elif method == "GFN2xTB":
+        return MAX_ATOMS_XTB
+    elif method == "GFN1xTB":
+        return MAX_ATOMS_XTB
 
 
 def ir_hash(atoms, method):
-    return hash(str(hash_atoms(atoms)) + method)
+    return hash_object(str(hash_atoms(atoms)) + method)
 
 
 def run_xtb_ir(
@@ -74,7 +89,11 @@ def run_xtb_ir(
         ]
 
         mode_info, has_imaginary, has_large_imaginary = compile_modes_info(
-            ir, linear, displacement_alignments, bond_displacements, bonds,
+            ir,
+            linear,
+            displacement_alignments,
+            bond_displacements,
+            bonds,
         )
         result = IRResult(
             wavenumbers=list(spectrum[0]),
@@ -97,15 +116,15 @@ def run_xtb_ir(
 
 @wrapt_timeout_decorator.timeout(TIMEOUT, use_signals=False)
 def calculate_from_smiles(smiles, method, myhash):
-    atoms, mol = smiles2ase(smiles)
+    atoms, mol = smiles2ase(smiles, get_max_atoms(method))
     opt_result = run_xtb_opt(atoms, method=method)
     result = run_xtb_ir(opt_result.atoms, method=method, mol=mol)
     ir_from_smiles_cache.set(myhash, result, expire=None)
     return result
 
 
 def ir_from_smiles(smiles, method):
-    myhash = get_hash(smiles + method)
+    myhash = hash_object(smiles + method)
     result = ir_from_smiles_cache.get(myhash)
     if result is None:
         result = calculate_from_smiles(smiles, method, myhash)
@@ -114,15 +133,15 @@ def ir_from_smiles(smiles, method):
 
 @wrapt_timeout_decorator.timeout(TIMEOUT, use_signals=False)
 def calculate_from_molfile(molfile, method, myhash):
-    atoms, mol = molfile2ase(molfile)
+    atoms, mol = molfile2ase(molfile, get_max_atoms(method))
     opt_result = run_xtb_opt(atoms, method=method)
     result = run_xtb_ir(opt_result.atoms, method=method, mol=mol)
     ir_from_molfile_cache.set(myhash, result, expire=None)
     return result
 
 
 def ir_from_molfile(molfile, method):
-    myhash = get_hash(molfile + method)
+    myhash = hash_object(molfile + method)
 
     result = ir_from_molfile_cache.get(myhash)
 
@@ -277,7 +296,15 @@ def get_displacement_xyz_for_mode(ir, frequencies, symbols, n):
     for i, pos in enumerate(ir.atoms.positions):
         xyz_file.append(
             "%2s %12.5f %12.5f %12.5f %12.5f %12.5f %12.5f\n"
-            % (symbols[i], pos[0], pos[1], pos[2], mode[i, 0], mode[i, 1], mode[i, 2],)
+            % (
+                symbols[i],
+                pos[0],
+                pos[1],
+                pos[2],
+                mode[i, 0],
+                mode[i, 1],
+                mode[i, 2],
+            )
         )
 
     xyz_file_string = "".join(xyz_file)

diff --git a/xtbservice/optimize.py b/xtbservice/optimize.py
@@ -10,11 +10,11 @@
 
 from .cache import opt_cache
 from .models import OptimizationResult
-from .utils import hash_atoms
+from .utils import hash_atoms, hash_object
 
 
 def opt_hash(atoms, method):
-    return hash(str(hash_atoms(atoms)) + method)
+    return hash_object(str(hash_atoms(atoms)) + method)
 
 
 def run_xtb_opt(

diff --git a/xtbservice/settings.py b/xtbservice/settings.py
@@ -4,9 +4,10 @@
 from fastapi.logger import logger
 
 IMAGINARY_FREQ_THRESHOLD = int(os.getenv("IMAGINARY_FREQ_THRESHOLD", 10))
-MAX_ATOMS = int(os.getenv("MAX_ATOMS", 50))
+MAX_ATOMS_XTB = int(os.getenv("MAX_ATOMS_XTB", 60))
+MAX_ATOMS_FF = int(os.getenv("MAX_ATOMS_FF", 100))
 TIMEOUT = int(os.getenv("TIMEOUT", 100))
 
 logger.info(
-    f"Settings: IMAGINARY_FREQ_THRESHOLD: {IMAGINARY_FREQ_THRESHOLD}, MAX_ATOMS: {MAX_ATOMS}, TIMEOUT: {TIMEOUT}"
+    f"Settings: IMAGINARY_FREQ_THRESHOLD: {IMAGINARY_FREQ_THRESHOLD}, MAX_ATOMS_XTB: {MAX_ATOMS_XTB}, MAX_ATOMS_FF: {MAX_ATOMS_FF}, TIMEOUT: {TIMEOUT}"
 )
diff --git a/xtbservice/utils.py b/xtbservice/utils.py
@@ -8,7 +8,11 @@
 from .cache import conformer_cache
 from .conformers import embed_conformer
 from .errors import TooLargeError
-from .settings import MAX_ATOMS
+from .settings import MAX_ATOMS_FF, MAX_ATOMS_XTB
+
+
+def hash_object(objec):
+    return hashlib.md5(str(objec).encode("utf-8")).hexdigest()
 
 
 def rdkit2ase(mol):
@@ -21,7 +25,14 @@ def rdkit2ase(mol):
     return atoms
 
 
-def molfile2ase(molfile: str) -> Atoms:
+def check_max_atoms(mol, max_atoms):
+    if mol.GetNumAtoms() > max_atoms:
+        raise TooLargeError(
+            f"Molecule can have maximal {max_atoms} atoms for this service"
+        )
+
+
+def molfile2ase(molfile: str, max_atoms: int = MAX_ATOMS_XTB) -> Atoms:
     try:
         result = conformer_cache.get(molfile)
     except KeyError:
@@ -30,30 +41,22 @@ def molfile2ase(molfile: str) -> Atoms:
     if result is None:
         mol = Chem.MolFromMolBlock(molfile, sanitize=True, removeHs=False)
         mol.UpdatePropertyCache(strict=False)
-        natoms = mol.GetNumAtoms()
-        if natoms > MAX_ATOMS:
-            raise TooLargeError(
-                f"Molecule can have maximal {MAX_ATOMS} atoms for this service"
-            )
+        check_max_atoms(mol, max_atoms)
         mol = embed_conformer(mol)
         result = rdkit2ase(mol), mol
         conformer_cache.set(molfile, result, expire=None)
     return result
 
 
-def smiles2ase(smiles: str) -> Atoms:
+def smiles2ase(smiles: str, max_atoms: int = MAX_ATOMS_XTB) -> Atoms:
     try:
         result = conformer_cache.get(smiles)
     except KeyError:
         pass
 
     if result is None:
         mol = Chem.MolFromSmiles(smiles)
-        natoms = mol.GetNumAtoms()
-        if natoms > MAX_ATOMS:
-            raise TooLargeError(
-                f"Molecule can have maximal {MAX_ATOMS} atoms for this service"
-            )
+        check_max_atoms(mol, max_atoms)
         refmol = Chem.AddHs(Chem.Mol(mol))
         refmol = embed_conformer(refmol)
         result = rdkit2ase(refmol), refmol
@@ -65,11 +68,7 @@ def hash_atoms(atoms: Atoms) -> int:
     symbols = str(atoms.symbols)
     positions = str(atoms.positions)
 
-    return hash(symbols + positions)
-
-
-def get_hash(string):
-    return hashlib.md5(string.encode("utf-8")).hexdigest()
+    return hash_object(symbols + positions)
 
 
 def get_center_of_mass(masses, positions):

diff --git a/xtbservice/xtbservice.py b/xtbservice/xtbservice.py
@@ -13,7 +13,7 @@
 from .errors import TooLargeError
 from .ir import ir_from_molfile, ir_from_smiles
 from .models import ConformerLibrary, ConformerRequest, IRRequest, IRResult
-from .settings import MAX_ATOMS
+from .settings import MAX_ATOMS_FF, MAX_ATOMS_XTB
 
 ALLOWED_HOSTS = ["*"]
 
@@ -30,6 +30,13 @@
 )
 
 
+def max_atoms_error():
+    return HTTPException(
+        status_code=422,
+        detail=f"This services only accepts structures with less than {MAX_ATOMS_FF} atoms for force-field calculations and {MAX_ATOMS_XTB} for xtb calculations.",
+    )
+
+
 @app.get("/app_version")
 @version(1)
 def read_version():
@@ -50,10 +57,7 @@ def post_get_ir_spectrum(irrequest: IRRequest):
                 detail="You need to provide either `molFile` or `smiles`",
             )
     except TooLargeError:
-        raise HTTPException(
-            status_code=422,
-            detail=f"This services only accepts structures with less than {MAX_ATOMS} atoms.",
-        )
+        raise max_atoms_error()
     except TimeoutError:
         raise HTTPException(status_code=500, detail="Calculation timed out.")
     return ir
@@ -83,10 +87,7 @@ def post_conformers(conformerrequest: ConformerRequest):
                 detail="You need to provide either `molFile` or `smiles`",
             )
     except TooLargeError:
-        raise HTTPException(
-            status_code=422,
-            detail=f"This services only accepts structures with less than {MAX_ATOMS} atoms.",
-        )
+        raise max_atoms_error()
     except TimeoutError:
         raise HTTPException(status_code=500, detail="Calculation timed out.")
     return conformers
@@ -98,10 +99,7 @@ def get_ir_spectrum(smiles: str, method: str = "GFNFF"):
     try:
         ir = ir_from_smiles(smiles, method)
     except TooLargeError:
-        raise HTTPException(
-            status_code=422,
-            detail=f"This services only accepts structures with less than {MAX_ATOMS} atoms.",
-        )
+        raise max_atoms_error()
     except TimeoutError:
         raise HTTPException(status_code=500, detail="Calculation timed out.")
     return ir