Skip to content

Commit

Permalink
PLIP v1.2.3 Better support for large and custom structures.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sebastian Salentin committed Feb 11, 2016
1 parent d683a34 commit 88e0cf9
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 26 deletions.
6 changes: 6 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
---------

### 1.2.3
* __Better support for files from MD and docking software__
* __Fixes issues with large and complex structures__
* Speed optimizations


### 1.2.2
* __Option to consider alternate atom locations (e.g. for ligands with several conformations__
* Automatic fixing of missing ligand names
Expand Down
49 changes: 41 additions & 8 deletions plip/modules/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,19 @@ def parse_pdb(self):
covalent = []
alt = []
previous_ter = False

#New code : Do fixing first and then do mapping on fixed lines
#@TODO Test code
lastnum = 0 # Atom numbering (has to be consecutive)
for line in fil:
corrected_line = self.fix_pdbline(line)
corrected_lines.append(corrected_line)
corrected_line, newnum = self.fix_pdbline(line, lastnum)
if corrected_line is not None:
corrected_lines.append(corrected_line)
lastnum = newnum
corrected_pdb = ''.join(corrected_lines)


for line in corrected_lines:
if line.startswith(("ATOM", "HETATM")):
# Retrieve alternate conformations
atomid, location = int(line[6:11]), line[16]
Expand All @@ -83,21 +92,42 @@ def parse_pdb(self):
# Get covalent linkages between ligands
if line.startswith("LINK"):
covalent.append(self.get_linkage(line))

corrected_pdb = ''.join(corrected_lines)
return d, modres, covalent, alt, corrected_pdb

def fix_pdbline(self, pdbline):
def fix_pdbline(self, pdbline, lastnum):
"""Fix a PDB line if information is missing."""
# #@todo Introduce verbose/log
#@ todo Unit tests
fixed = False
newnum = 0
pdbline = pdbline.strip('\n')
# Some MD / Docking tools produce empty lines, leading to segfaults
if len(pdbline.strip()) == 0:
self.num_fixed_lines += 1
return None, lastnum
if len(pdbline) > 100: # Should be 80 long
self.num_fixed_lines += 1
return None, lastnum
# TER Entries also have continuing numbering, consider them as well
if pdbline.startswith('TER'):
newnum = lastnum + 1
if pdbline.startswith('ATOM'):
newnum = lastnum + 1
currentnum = int(pdbline[6:11])
if lastnum + 1 != currentnum:
pdbline = pdbline[:6] + (5 - len(str(newnum))) * ' ' + str(newnum) + ' ' + pdbline[12:]
fixed = True
# No chain assigned
if pdbline[21] == ' ':
pdbline = pdbline[:21] + 'A' + pdbline[22:]
fixed = True
if pdbline.endswith('H'):
self.num_fixed_lines += 1
return None, lastnum
if pdbline.startswith('HETATM'):
newnum = lastnum + 1
currentnum = int(pdbline[6:11])
if lastnum + 1 != currentnum:
pdbline = pdbline[:6] + (5 - len(str(newnum))) * ' ' + str(newnum) + ' ' + pdbline[12:]
fixed = True
# No chain assigned
if pdbline[21] == ' ':
pdbline = pdbline[:21] + 'Z' + pdbline[22:]
Expand All @@ -114,8 +144,11 @@ def fix_pdbline(self, pdbline):
if len(ligname.strip()) == 0:
pdbline = pdbline[:17] + 'LIG ' + pdbline[21:]
fixed = True
if pdbline.endswith('H'):
self.num_fixed_lines += 1
return None, lastnum
self.num_fixed_lines += 1 if fixed else 0
return pdbline
return pdbline + '\n', max(newnum, lastnum)

def get_linkage(self, line):
"""Get the linkage information from a LINK entry PDB line."""
Expand Down
37 changes: 26 additions & 11 deletions plip/modules/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@
metal_info = namedtuple('metal_info', 'metal_id, target_id location')


def select_by_ids(selname, idlist, selection_exists=False, chunksize=20):
"""Selection with a large number of ids concatenated into a selection
list can cause buffer overflow in PyMOL. This function takes a selection
name and and list of IDs (list of integers) as input and makes a careful
step-by-step selection (packages of 20 by default)"""
idlist = list(set(idlist)) # Remove duplicates
if not selection_exists:
cmd.select(selname, 'None') # Empty selection first
idchunks = [idlist[i:i+chunksize] for i in xrange(0, len(idlist), chunksize)]
for idchunk in idchunks:
cmd.select(selname, '%s or (id %s)' % (selname, '+'.join(map(str, idchunk))))

class PyMOLComplex:
"""Contains all information on a complex relevant for visualization. Can be pickled"""
def __init__(self, mol, site):
Expand Down Expand Up @@ -208,7 +220,7 @@ def visualize_in_pymol(plcomplex):

# Visualize and color metal ions if there are any
if not len(metal_ids) == 0:
cmd.select(ligname, '%s or id %s' % (ligname, metal_ids_str))
select_by_ids(ligname, metal_ids, selection_exists=True)
cmd.show('spheres', 'id %s and %s' % (metal_ids_str, pdbid))

# Additionally, select all members of composite ligands
Expand Down Expand Up @@ -244,7 +256,7 @@ def visualize_in_pymol(plcomplex):
if not len(plcomplex.hydrophobic_contacts.bs_ids) == 0:
for h in [['Hydrophobic-P', plcomplex.hydrophobic_contacts.bs_ids],
['Hydrophobic-L', plcomplex.hydrophobic_contacts.lig_ids]]:
cmd.select(h[0], 'id %s' % '+'.join(map(str, h[1])))
select_by_ids(h[0], h[1])
for i in plcomplex.hydrophobic_contacts.pairs_ids:
cmd.select('tmp_bs', 'id %i' % i[0])
cmd.select('tmp_lig', 'id %i' % i[1])
Expand All @@ -262,7 +274,7 @@ def visualize_in_pymol(plcomplex):
for group in [['HBondDonor-L', plcomplex.hbonds.lig_don_id], ['HBondDonor-P', plcomplex.hbonds.prot_don_id],
['HBondAccept-L', plcomplex.hbonds.lig_acc_id], ['HBondAccept-P', plcomplex.hbonds.prot_acc_id]]:
if not len(group[1]) == 0:
cmd.select(group[0], 'id %s' % '+'.join(map(str, group[1])))
select_by_ids(group[0], group[1])
for i in plcomplex.hbonds.ldon_id:
cmd.select('tmp_bs', 'id %i' % i[0])
cmd.select('tmp_lig', 'id %i' % i[1])
Expand All @@ -287,8 +299,9 @@ def visualize_in_pymol(plcomplex):
cmd.select(group[0], 'id %i' % group[1])
cmd.distance('HalogenBonds', 'tmp_bs', 'tmp_lig')
if not len(all_acc_o) == 0:
cmd.select('HalogenAccept', 'id %s' % '+'.join(map(str, all_acc_o)))
cmd.select('HalogenDonor', 'id %s' % '+'.join(map(str, all_don_x)))
select_by_ids('HalogenAccept', all_acc_o)
select_by_ids('HalogenDonor', all_don_x)
#cmd.select('HalogenDonor', 'id %s' % '+'.join(map(str, all_don_x)))
if object_exists('HalogenBonds'):
cmd.set('dash_color', 'greencyan', 'HalogenBonds')

Expand Down Expand Up @@ -408,7 +421,7 @@ def visualize_in_pymol(plcomplex):
###################

if not len(plcomplex.metal_complexes) == 0:
cmd.select('Metal-M', 'id %s' % metal_ids_str)
select_by_ids('Metal-M', metal_ids)
for metal_complex in plcomplex.metal_complexes:
cmd.select('tmp_m', 'id %i' % metal_complex.metal_id)
cmd.select('tmp_t', 'id %i' % metal_complex.target_id)
Expand Down Expand Up @@ -467,8 +480,10 @@ def visualize_in_pymol(plcomplex):
if object_exists(ligname):
cmd.zoom(ligname, 3)

cmd.set('sphere_scale', 0.2, 'resn HOH') # Needs to be done here because of the copy made
cmd.set('sphere_transparency', 0.4, '!resn HOH')
# Resize water molecules. Sometimes they are not heteroatoms HOH, but part of the protein
cmd.set('sphere_scale', 0.2, 'resn HOH or Water') # Needs to be done here because of the copy made
cmd.set('sphere_transparency', 0.4, '!(resn HOH or Water)')

cmd.origin(ligname)
if 'Centroids*' in cmd.get_names("selections"):
cmd.color('grey80', 'Centroids*')
Expand All @@ -481,11 +496,11 @@ def visualize_in_pymol(plcomplex):
# Selections for unpaired groups #
##################################
if not len(plcomplex.unpaired_hba_idx) == 0:
cmd.select('Unpaired-HBA', 'Unpaired-HBA or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hba_idx))
select_by_ids('Unpaired-HBA', plcomplex.unpaired_hba_idx, selection_exists=True)
if not len(plcomplex.unpaired_hbd_idx) == 0:
cmd.select('Unpaired-HBD', 'Unpaired-HBD or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hbd_idx))
select_by_ids('Unpaired-HBD', plcomplex.unpaired_hbd_idx, selection_exists=True)
if not len(plcomplex.unpaired_hal_idx) == 0:
cmd.select('Unpaired-HAL', 'Unpaired-HAL or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hal_idx))
select_by_ids('Unpaired-HAL', plcomplex.unpaired_hal_idx, selection_exists=True)

##############################
# Organization of selections #
Expand Down
12 changes: 6 additions & 6 deletions plip/plipcmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import multiprocessing
# External libraries
import lxml.etree as et

__version__ = '1.2.2'
__version__ = '1.2.3'
descript = "Protein-Ligand Interaction Profiler (PLIP) v%s " \
"is a command-line based tool to analyze interactions in a protein-ligand complex. " \
"If you are using PLIP in your work, please cite: " \
Expand Down Expand Up @@ -89,11 +89,11 @@ def fetch_pdb(pdbid):
sysexit(3, 'Invalid PDB ID (Entry does not exist on PDB server)')
message('Downloading file from PDB ... ')
pdburl = 'http://www.rcsb.org/pdb/files/%s.pdb' % current_entry # Get URL for current entry
pdbfile = None
try:
pdbfile = urllib2.urlopen(pdburl).read()
except urllib2.HTTPError:
sysexit(5, "Error: No file in PDB format available from wwPDB for the given PDB ID.")
pdbfile = urllib2.urlopen(pdburl).read()
# If no PDB file is available, a text is now shown with "We're sorry, but ..."
# Could previously be distinguished by an HTTP error
if 'sorry' in pdbfile:
sysexit(5, "Error: No file in PDB format available from wwPDB for the given PDB ID.\n")
return [pdbfile, current_entry]


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from setuptools import setup

setup(name='plip',
version='1.2.2a',
version='1.2.3',
description='PLIP - Fully automated protein-ligand interaction profiler',
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down

0 comments on commit 88e0cf9

Please sign in to comment.