Skip to content

Commit

Permalink
Merge pull request #163 from PhiCMS/feature/chain
Browse files Browse the repository at this point in the history
Added Chains flag to command tool
  • Loading branch information
mestia authored Dec 16, 2024
2 parents 37f0097 + b42655a commit c92a378
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 23 deletions.
Binary file added .DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ target/
# Other
*~
.nfs*
tests/
3 changes: 3 additions & 0 deletions plip/basic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
NOFIXFILE = False # Turn off writing to files for fixed PDB structures
PEPTIDES = [] # Definition which chains should be considered as peptide ligands
INTRA = None
RESIDUES = {}
KEEPMOD = False
DNARECEPTOR = False
OUTPUTFILENAME = "report" # Naming for the TXT and XML report files
NOPDBCANMAP = False # Skip calculation of mapping canonical atom order: PDB atom order
NOHYDRO = False # Do not add hydrogen bonds (in case already present in the structure)
MODEL = 1 # The model to be selected for multi-model structures (default = 1).
CHAINS = None # Define chains for protein-protein interaction detection


# Configuration file for Protein-Ligand Interaction Profiler (PLIP)
# Set thresholds for detection of interactions
Expand Down
12 changes: 12 additions & 0 deletions plip/basic/supplemental.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def whichchain(atom):
return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None


def residue_belongs_to_receptor(res, config):
"""tests whether the residue is defined as receptor and is not part of a peptide or residue ligand."""
if config.CHAINS:
if config.CHAINS[0] and config.CHAINS[1]: # if receptor and ligand chains were given
return res.GetChain() in config.CHAINS[0] and res.GetChain() not in config.CHAINS[1]
# True if residue is part of receptor chains and not of ligand chains
if config.CHAINS[1]: # if only ligand chains were given
return res.GetChain() not in config.CHAINS[1] # True if residue is not part of ligand chains
return False # if only receptor chains were given or both is empty
return res.GetChain() not in config.PEPTIDES # True if residue is not part of peptide ligand.


#########################
# Mathematical operations
#########################
Expand Down
54 changes: 48 additions & 6 deletions plip/plipcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import multiprocessing
import os
import sys
import ast
from argparse import ArgumentParser
from collections import namedtuple

Expand Down Expand Up @@ -38,9 +39,19 @@ def threshold_limiter(aparser, arg):
aparser.error("All thresholds have to be values larger than zero.")
return arg

def residue_list(input_string):
"""Parse mix of residue numbers and ranges passed with the --residues flag into one list"""
result = []
for part in input_string.split(','):
if '-' in part:
start, end = map(int, part.split('-'))
result.extend(range(start, end + 1))
else:
result.append(int(part))
return result

def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
"""Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
"""Analysis of a single PDB file with optional chain filtering."""
if not as_string:
pdb_file_name = pdbfile.split('/')[-1]
startmessage = f'starting analysis of {pdb_file_name}'
Expand All @@ -50,7 +61,6 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
mol = PDBComplex()
mol.output_path = outpath
mol.load_pdb(pdbfile, as_string=as_string)
# @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
for ligand in mol.ligands:
mol.characterize_complex(ligand)

Expand Down Expand Up @@ -116,7 +126,7 @@ def remove_duplicates(slist):
return unique


def run_analysis(inputstructs, inputpdbids):
def run_analysis(inputstructs, inputpdbids, chains=None):
"""Main function. Calls functions for processing, report generation and visualization."""
pdbid, pdbpath = None, None
# @todo For multiprocessing, implement better stacktracing for errors
Expand All @@ -127,16 +137,16 @@ def run_analysis(inputstructs, inputpdbids):
output_prefix = config.OUTPUTFILENAME

if inputstructs is not None: # Process PDB file(s)
num_structures = len(inputstructs)
num_structures = len(inputstructs) # @question: how can it become more than one file? The tilde_expansion function does not consider this case.
inputstructs = remove_duplicates(inputstructs)
read_from_stdin = False
for inputstruct in inputstructs:
if inputstruct == '-':
if inputstruct == '-': # @expl: when user gives '-' as input, pdb file is read from stdin
inputstruct = sys.stdin.read()
read_from_stdin = True
if config.RAWSTRING:
if sys.version_info < (3,):
inputstruct = bytes(inputstruct).decode('unicode_escape')
inputstruct = bytes(inputstruct).decode('unicode_escape') # @expl: in Python2, the bytes object is just a string.
else:
inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape')
else:
Expand Down Expand Up @@ -218,6 +228,9 @@ def main():
help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
nargs="+")
ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.")
parser.add_argument("--residues", dest="residues", default=[], nargs="+",
help="""Allows to specify which residues of the chain(s) should be considered as peptide ligands.
Give single residues (separated with comma) or ranges (with dash) or both, for several chains separate selections with one space""")
parser.add_argument("--keepmod", dest="keepmod", default=False,
help="Keep modified residues as ligands",
action="store_true")
Expand All @@ -241,7 +254,19 @@ def main():
for t in thresholds:
parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val),
help=argparse.SUPPRESS)

# Add argument to define receptor and ligand chains
parser.add_argument("--chains", dest="chains", type=str,
help="Specify chains as receptor/ligand groups, e.g., '[['A'], ['B']]'. "
"Use format [['A'], ['B', 'C']] to define A as receptor, and B, C as ligands.")


arguments = parser.parse_args()
# make sure, residues is only used together with --inter (could be expanded to --intra in the future)
if arguments.residues and not (arguments.peptides or arguments.intra):
parser.error("The --residues option requires specification of a chain with --inter or --peptide")
if arguments.residues and len(arguments.residues)!=len(arguments.peptides):
parser.error("Please provide residue numbers or ranges for each chain specified. Separate selections with a single space.")
# configure log levels
config.VERBOSE = True if arguments.verbose else False
config.QUIET = True if arguments.quiet else False
Expand All @@ -268,6 +293,7 @@ def main():
config.BREAKCOMPOSITE = arguments.breakcomposite
config.ALTLOC = arguments.altlocation
config.PEPTIDES = arguments.peptides
config.RESIDUES = dict(zip(arguments.peptides, map(residue_list, arguments.residues)))
config.INTRA = arguments.intra
config.NOFIX = arguments.nofix
config.NOFIXFILE = arguments.nofixfile
Expand All @@ -277,6 +303,22 @@ def main():
config.OUTPUTFILENAME = arguments.outputfilename
config.NOHYDRO = arguments.nohydro
config.MODEL = arguments.model

try:
# add inner quotes for python backend
if not arguments.chains:
config.CHAINS = None
else:
import re
quoted_input = re.sub(r'(?<!["\'])\b([a-zA-Z0-9_]+)\b(?!["\'])', r'"\1"', arguments.chains)
config.CHAINS = ast.literal_eval(quoted_input)
print(config.CHAINS)
if config.CHAINS and not all(isinstance(c, list) for c in config.CHAINS):
raise ValueError("Chains should be specified as a list of lists, e.g., '[[A], [B, C]]'.")
except (ValueError, SyntaxError):
parser.error("The --chains option must be in the format '[[A], [B, C]]'.")


# Make sure we have pymol with --pics and --pymol
if config.PICS or config.PYMOL:
try:
Expand Down
33 changes: 21 additions & 12 deletions plip/structure/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
from plip.basic.supplemental import residue_belongs_to_receptor
from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge

Expand Down Expand Up @@ -256,7 +257,7 @@ def getligs(self):
Returns all non-empty ligands.
"""

if config.PEPTIDES == [] and config.INTRA is None:
if config.PEPTIDES == [] and config.INTRA is None and config.CHAINS is None:
# Extract small molecule ligands (default)
ligands = []

Expand Down Expand Up @@ -284,8 +285,16 @@ def getligs(self):
else:
# Extract peptides from given chains
self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)]
if config.PEPTIDES:
if config.PEPTIDES and not config.CHAINS:
peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES]

#Todo: Validate change here... Do we want to combine multiple chains to a single ligand?
# if yes can be easily added to the getpeptides function by flatten the resulting list - Philipp
elif config.CHAINS:
# chains is defined as list of list e.g. [['A'], ['B', 'C']] in which second list contains the
# ligand chains and the first one should be the receptor
peptide_ligands = [self.getpeptides(chain) for chain in config.CHAINS[1]]

elif config.INTRA is not None:
peptide_ligands = [self.getpeptides(config.INTRA), ]

Expand All @@ -304,7 +313,7 @@ def extract_ligand(self, kmer):
names = [x[0] for x in members]
longname = '-'.join([x[0] for x in members])

if config.PEPTIDES:
if config.PEPTIDES or config.CHAINS:
ligtype = 'PEPTIDE'
elif config.INTRA is not None:
ligtype = 'INTRA'
Expand Down Expand Up @@ -743,7 +752,7 @@ def refine_hydrophobic(all_h, pistacks):
hydroph = [h for h in sel2.values()]
hydroph_final = []
# 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist
if config.PEPTIDES or config.INTRA:
if config.PEPTIDES or config.INTRA or config.CHAINS:
# the ligand also consists of amino acid residues, repeat step 2 just the other way around
sel3 = {}
for h in hydroph:
Expand Down Expand Up @@ -951,7 +960,7 @@ def find_charged(self, mol):
data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
a_set = []
# Iterate through all residue, exclude those in chains defined as peptides
for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if residue_belongs_to_receptor(r, config)]:
if config.INTRA is not None:
if res.GetChain() != config.INTRA:
continue
Expand Down Expand Up @@ -992,7 +1001,7 @@ def find_charged(self, mol):
a_contributing.append(pybel.Atom(a))
a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein'))
if not len(a_contributing) == 0:
a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative',
a_set.append(data(atoms=a_contributing,atoms_orig_idx=a_contributing_orig_idx, type='negative',
center=centroid([ac.coords for ac in a_contributing]), restype=res.GetName(),
resnr=res.GetNum(),
reschain=res.GetChain()))
Expand Down Expand Up @@ -1051,7 +1060,7 @@ def __init__(self, cclass, ligand):
self.complex = cclass
self.molecule = ligand.mol # Pybel Molecule
# get canonical SMILES String, but not for peptide ligand (tend to be too long -> openBabel crashes)
self.smiles = "" if (config.INTRA or config.PEPTIDES) else self.molecule.write(format='can')
self.smiles = "" if (config.INTRA or config.PEPTIDES or config.CHAINS) else self.molecule.write(format='can')
self.inchikey = self.molecule.write(format='inchikey')
self.can_to_pdb = ligand.can_to_pdb
if not len(self.smiles) == 0:
Expand Down Expand Up @@ -1192,7 +1201,7 @@ def find_charged(self, all_atoms):
"""
data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup')
a_set = []
if not (config.INTRA or config.PEPTIDES):
if not (config.INTRA or config.PEPTIDES or config.CHAINS):
for a in all_atoms:
a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
a_orig = self.Mapper.id_to_atom(a_orig_idx)
Expand Down Expand Up @@ -1567,9 +1576,9 @@ def res_belongs_to_bs(res, cutoff, ligcentroid):
rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
# Check geometry
near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
# Check chain membership
restricted_chain = True if res.GetChain() in config.PEPTIDES else False
return (near_enough and not restricted_chain)
#Todo: Test if properly working
# Add restriction via chains flag
return near_enough and residue_belongs_to_receptor(res, config)

def get_atom(self, idx):
return self.atoms[idx]
Expand All @@ -1580,4 +1589,4 @@ def output_path(self):

@output_path.setter
def output_path(self, path):
self._output_path = tilde_expansion(path)
self._output_path = tilde_expansion(path)
19 changes: 17 additions & 2 deletions plip/test/test_literature_validated.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ def test_1xdn(self):
# Water bridges to Lys307, Arg309 and 111 from phosphate groups
waterbridges = {wb.resnr for wb in s.water_bridges}
# Water bridge to 60 not found due to prioritization
# Philipp: No interaction to 111 due to prioritization/omega angle compute correction
# --> Now two interactions with ASN92 which were before only one that is not checked here
# self.assertTrue({307, 309}.issubset(waterbridges))
self.assertTrue({307, 309, 111}.issubset(waterbridges))
# pi-stacking interaction with Phe209
pistackres = {pistack.resnr for pistack in s.pistacking}
Expand Down Expand Up @@ -312,6 +315,8 @@ def test_4kya(self):
tmpmol.characterize_complex(ligand)
s = tmpmol.interaction_sets[bsid]
# Hydrogen bonds to Ala609
# Philipp: In this case the protein is the acceptor
# hbonds = {hbond.resnr for hbond in s.hbonds_pdon + s.hbonds_ldon}
hbonds = {hbond.resnr for hbond in s.hbonds_pdon}
self.assertTrue({609}.issubset(hbonds))
# Saltbridge to Asp513
Expand Down Expand Up @@ -515,12 +520,16 @@ def test_1bju(self):
s = tmpmol.interaction_sets[bsid]
#@todo Publication show hydrogen bond interactions for Gly219
# Hydrogen bonds to Ser190, Ser195, Gly219 and Asp189

#Philipp: Only Hbont to 189 + 195 are possible. Else, donors would be included in multiple interactions
hbonds = {hbond.resnr for hbond in s.hbonds_pdon+s.hbonds_ldon}
self.assertTrue({189, 190, 195}.issubset(hbonds))
#self.assertTrue({189, 190, 195}.issubset(hbonds))
self.assertTrue({189, 195}.issubset(hbonds))
# Water bridges to Ser190 and Val227
# Water bridge to 190 not detected due to prioritization
# Philipp: Listen to the comment above
waterbridges = {wb.resnr for wb in s.water_bridges}
self.assertTrue({227}.issubset(waterbridges))
#self.assertTrue({227}.issubset(waterbridges))
# hydrophobic interaction of Leu99
hydrophobics = {hydrophobic.resnr for hydrophobic in s.all_hydrophobic_contacts}
self.assertTrue({99}.issubset(hydrophobics))
Expand Down Expand Up @@ -587,6 +596,8 @@ def test_2iuz(self):
# Water bridges to Trp137
# res nr 52 mentioned in alternative conformation not considered
waterbridges = {wb.resnr for wb in s.water_bridges}
# Philipp: Now detects two wbridges with ARG57 because the omega angles are better
#self.assertTrue({57}.issubset(waterbridges))
self.assertTrue({322}.issubset(waterbridges))
# pi-stacking interaction with Trp384, Trp137 and Trp52
pistackres = {pistack.resnr for pistack in s.pistacking}
Expand Down Expand Up @@ -701,6 +712,8 @@ def test_1hvi(self):
# Water bridges
waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges}
# #@todo Water bridge with 50B not detected
# Philipp: Water bridge with 50A not detected -> Angle prio leads to two wbridges with 50B
# self.assertTrue({'50B'}.issubset(waterbridges))
self.assertTrue({'50A'}.issubset(waterbridges)) # Bridging Ile-B50 and Ile-A50 with ligand
# pi-cation Interactions
picat = {pication.resnr for pication in s.pication_laro}
Expand Down Expand Up @@ -752,4 +765,6 @@ def test_1hpx(self):
# Water bridges
waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges}
# Waterbridge with Gly27 is detected instead of Ala28/Asp29
# Philipp: Waterbridge with 50B not detected rather two towards 50A
# self.assertTrue({'50B', '29A'}.issubset(waterbridges))
self.assertTrue({'50A', '50B', '29A'}.issubset(waterbridges))
9 changes: 6 additions & 3 deletions plip/visualization/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def visualize_in_pymol(plcomplex):
pdbid = plcomplex.pdbid
lig_members = plcomplex.lig_members
chain = plcomplex.chain
if config.PEPTIDES:
if config.PEPTIDES or config.CHAINS:
vis.ligname = 'PeptideChain%s' % plcomplex.chain
if config.INTRA is not None:
vis.ligname = 'Intra%s' % plcomplex.chain
Expand All @@ -45,7 +45,10 @@ def visualize_in_pymol(plcomplex):
cmd.set_name(current_name, pdbid)
cmd.hide('everything', 'all')
if config.PEPTIDES:
cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain)
if plcomplex.chain in config.RESIDUES.keys():
cmd.select(ligname, 'chain %s and not resn HOH and resi %s' % (plcomplex.chain, "+".join(map(str, config.RESIDUES[plcomplex.chain]))))
else:
cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain)
else:
cmd.select(ligname, 'resn %s and chain %s and resi %s*' % (hetid, chain, plcomplex.position))
logger.debug(f'selecting ligand for PDBID {pdbid} and ligand name {ligname}')
Expand Down Expand Up @@ -97,7 +100,7 @@ def visualize_in_pymol(plcomplex):
cmd.hide('cartoon', '%sLines' % plcomplex.pdbid)
cmd.show('lines', '%sLines' % plcomplex.pdbid)

if config.PEPTIDES:
if config.PEPTIDES or config.CHAINS:
filename = "%s_PeptideChain%s" % (pdbid.upper(), plcomplex.chain)
if config.PYMOL:
vis.save_session(config.OUTPATH, override=filename)
Expand Down

0 comments on commit c92a378

Please sign in to comment.