Skip to content

Commit

Permalink
Parse InChI and AuxInfo from executable output
Browse files Browse the repository at this point in the history
  • Loading branch information
JanCBrammer committed Oct 16, 2024
1 parent 632f444 commit 236a36e
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions INCHI-1-TEST/tests/test_executable/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import subprocess
import re
from typing import Callable
from pathlib import Path
from dataclasses import dataclass
Expand All @@ -17,7 +18,32 @@ def pytest_addoption(parser):
@dataclass
class InchiResult:
stdout: str
output: str # InChI and AuxInfo
inchi: str
aux_info: str


def parse_inchi_from_executable_output(output: str) -> str:
"""The RegEx does not validate the InChI.
It's meant to be lenient enough to recognize (almost) any InChI in the output.
See https://chemistry.stackexchange.com/questions/82144/what-is-the-correct-regular-expression-for-inchi.
"""
inchi_regex = re.compile(r"InChI=1S?/\S+(\s|$)")
inchi_match = inchi_regex.search(output)
if inchi_match is None:
print("Executable output does not contain an InChI.")
return ""

return inchi_match.group()


def parse_aux_info_from_executable_output(output: str) -> str:
aux_info_regex = re.compile(r"AuxInfo=\S+(\s|$)")
aux_info_match = aux_info_regex.search(output)
if aux_info_match is None:
print("Executable output does not contain an AuxInfo.")
return ""

return aux_info_match.group()


@pytest.fixture
Expand All @@ -28,16 +54,18 @@ def _run_inchi_exe(molfile_path: str, args: str = "") -> InchiResult:
if not Path(exe_path).exists():
raise FileNotFoundError(f"InChI executable not found at {exe_path}.")

output_path = tmp_path.joinpath("output.txt")
result = subprocess.run(
[exe_path, molfile_path, str(tmp_path.joinpath("output.txt"))]
+ args.split(),
[exe_path, molfile_path, str(output_path)] + args.split(),
capture_output=True,
text=True,
)
output = output_path.read_text()

return InchiResult(
stdout=result.stderr,
output=Path(tmp_path.joinpath("output.txt")).read_text(),
inchi=parse_inchi_from_executable_output(output),
aux_info=parse_aux_info_from_executable_output(output),
)

return _run_inchi_exe

0 comments on commit 236a36e

Please sign in to comment.