Skip to content

Commit

Permalink
secse v1.3 update
Browse files Browse the repository at this point in the history
  • Loading branch information
ChongLu121 committed Jun 13, 2023
1 parent c092526 commit 3822666
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 20 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ further validation.
----------------------------

1. Setting up dependencies
python ~=3.9, perl ~=5.32
python ~=3.9, perl ~=5.32
```bash
conda create --name secse -c conda-forge parallel tqdm biopandas openbabel chemprop xlrd=2 pandarallel rdkit=2022.09
conda activate secse
Expand Down Expand Up @@ -114,6 +114,10 @@ further validation.
- _spiro_site_count_, maximum of spiro ring site count, default=1, type=int
- _fused_site_count_, maximum of fused ring site count, default=3, type=int
- _rdkit_sa_score_, synthetic accessibility score (calculated by RDKit) cutoff, default=5, type=float
- _substructure_filter_, files containing the customized unwanted substructure SMARTS in "*.xls" format, set the
value to 0 if you do not have any additional unwanted substructure. PANIS already includes as default. The file
should include columns for **`Pattern`**, **`ID`**, and **`Max`**, where the **`ID`** should be unique for each SMARTS. You can
refer to the example file [subtructure_filter_demo.xls](demo/subtructure_filter_demo.xls), default=0, type=string
Config file of a demo case [phgdh_demo_vina.ini](demo/phgdh_demo_vina.ini)
Customized rule json template [rules.json](demo/rules.json). Rule ID should be in the form G-001-XXXX, like
Expand All @@ -139,7 +143,8 @@ GNU Parallel installation
python ~=3.9, perl ~=5.32
numpy~=1.24.3, pandas~=1.3.3, xlrd~=2.0.1, pandarallel~=1.5.2, tqdm~=4.65.0, biopandas~=0.4.1, openbabel~=3.1.1, rdkit~=2022.09, chemprop~=1.5.2, pytorch~=2.0.0+cu117
numpy~=1.24.3, pandas~=1.3.3, xlrd~=2.0.1, pandarallel~=1.5.2, tqdm~=4.65.0, biopandas~=0.4.1, openbabel~=3.1.1, rdkit~
=2022.09, chemprop~=1.5.2, pytorch~=2.0.0+cu117
Linux server with CPUs only also works.
Expand Down
3 changes: 2 additions & 1 deletion demo/phgdh_demo_vina.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@ ring_system_count = 4
bridged_site_count = 2
spiro_site_count = 1
fused_site_count = 3
rdkit_sa_score = 5
rdkit_sa_score = 5
substructure_filter = 0
Binary file added demo/subtructure_filter_demo.xls
Binary file not shown.
21 changes: 8 additions & 13 deletions secse/growing/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,17 @@ def __init__(self, gen, config_path):
self.input_smiles = None
self.mol = None
self.pains_smarts = None
self.strutFilter = StructureFilter()

config = configparser.ConfigParser()
config.read(config_path)

substructure_filter_file = config.get("properties", "substructure_filter")
if substructure_filter_file == "0":
self.strutFilter = StructureFilter()
else:
# print("Use additional substructure filter patters.")
self.strutFilter = StructureFilter(substructure_filter_file)

self.MW = config.getfloat("properties", "MW")
self.logP_lower = config.getfloat("properties", "logP_lower")
self.logP_upper = config.getfloat("properties", "logP_upper")
Expand Down Expand Up @@ -143,19 +150,7 @@ def alert_filter(self):
yield "PAINS"
yield "PASS"

def element_filter(self):
f_count = self.input_smiles.count("F")
br_count = self.input_smiles.count("Br")
cl_count = self.input_smiles.count("Cl")
i_count = self.input_smiles.count("I")
s_count = self.input_smiles.count("S") + self.input_smiles.count("s")
p_count = self.input_smiles.count("P")
if not all([f_count <= 5, br_count <= 2, cl_count <= 3, i_count <= 1, s_count <= 2, p_count <= 1]):
yield "element"
yield "PASS"

def substructure_filter(self):
# self.element_filter()
yield self.strutFilter.sfilter(self.mol)

def ring_system_filter(self):
Expand Down
8 changes: 4 additions & 4 deletions secse/utilities/substructure_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
import pandas as pd
from rdkit import Chem

FILTER_FILE = "Structure Filter_20211015_v1.12.xls"
FILTER_FILE = os.path.join(os.getenv("SECSE"), "utilities", "Structure Filter_20211015_v1.12.xls")


class StructureFilter:
def __init__(self):
df = pd.read_excel(os.path.join(os.getenv("SECSE"), "utilities", FILTER_FILE),
usecols=["Pattern", "ID", "Max"]).dropna()
def __init__(self, filter_lst=FILTER_FILE):
df = pd.read_excel(filter_lst, usecols=["Pattern", "ID", "Max"]).dropna()
df["ID"] = df["ID"].astype(str)
df = df.set_index("ID")
df["Pattern_sma"] = df["Pattern"].apply(lambda x: Chem.MolFromSmarts(x))
self.fdic = df[["Pattern_sma", "Max"]].T.to_dict()
Expand Down

0 comments on commit 3822666

Please sign in to comment.