PLIP v1.2.3 Better support for large and custom structures.

pharmai · Feb 11, 2016 · 88e0cf9 · 88e0cf9
1 parent d683a34
commit 88e0cf9
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 26 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,6 +1,12 @@
 Changelog
 ---------
 
+### 1.2.3
+* __Better support for files from MD and docking software__
+* __Fixes issues with large and complex structures__
+* Speed optimizations
+
+
 ### 1.2.2
 * __Option to consider alternate atom locations (e.g. for ligands with several conformations__
 * Automatic fixing of missing ligand names

diff --git a/plip/modules/preparation.py b/plip/modules/preparation.py
@@ -55,10 +55,19 @@ def parse_pdb(self):
         covalent = []
         alt = []
         previous_ter = False
+
+        #New code : Do fixing first and then do mapping on fixed lines
+        #@TODO Test code
+        lastnum = 0 # Atom numbering (has to be consecutive)
         for line in fil:
-            corrected_line = self.fix_pdbline(line)
-            corrected_lines.append(corrected_line)
+            corrected_line, newnum = self.fix_pdbline(line, lastnum)
+            if corrected_line is not None:
+                corrected_lines.append(corrected_line)
+                lastnum = newnum
+        corrected_pdb = ''.join(corrected_lines)
 
+
+        for line in corrected_lines:
             if line.startswith(("ATOM", "HETATM")):
                 # Retrieve alternate conformations
                 atomid, location = int(line[6:11]), line[16]
@@ -83,21 +92,42 @@ def parse_pdb(self):
             # Get covalent linkages between ligands
             if line.startswith("LINK"):
                 covalent.append(self.get_linkage(line))
-
-        corrected_pdb = ''.join(corrected_lines)
         return d, modres, covalent, alt, corrected_pdb
 
-    def fix_pdbline(self, pdbline):
+    def fix_pdbline(self, pdbline, lastnum):
         """Fix a PDB line if information is missing."""
-        # #@todo Introduce verbose/log
-        #@ todo Unit tests
         fixed = False
+        newnum = 0
+        pdbline = pdbline.strip('\n')
+        # Some MD / Docking tools produce empty lines, leading to segfaults
+        if len(pdbline.strip()) == 0:
+            self.num_fixed_lines += 1
+            return None, lastnum
+        if len(pdbline) > 100: # Should be 80 long
+            self.num_fixed_lines += 1
+            return None, lastnum
+        # TER Entries also have continuing numbering, consider them as well
+        if pdbline.startswith('TER'):
+            newnum = lastnum + 1
         if pdbline.startswith('ATOM'):
+            newnum = lastnum + 1
+            currentnum = int(pdbline[6:11])
+            if lastnum + 1 != currentnum:
+                pdbline = pdbline[:6] + (5 - len(str(newnum))) * ' ' + str(newnum) + ' ' + pdbline[12:]
+                fixed = True
             # No chain assigned
             if pdbline[21] == ' ':
                 pdbline = pdbline[:21] + 'A' + pdbline[22:]
                 fixed = True
+            if pdbline.endswith('H'):
+                self.num_fixed_lines += 1
+                return None, lastnum
         if pdbline.startswith('HETATM'):
+            newnum = lastnum + 1
+            currentnum = int(pdbline[6:11])
+            if lastnum + 1 != currentnum:
+                pdbline = pdbline[:6] + (5 - len(str(newnum))) * ' ' + str(newnum) + ' ' + pdbline[12:]
+                fixed = True
             # No chain assigned
             if pdbline[21] == ' ':
                 pdbline = pdbline[:21] + 'Z' + pdbline[22:]
@@ -114,8 +144,11 @@ def fix_pdbline(self, pdbline):
             if len(ligname.strip()) == 0:
                 pdbline = pdbline[:17] + 'LIG ' + pdbline[21:]
                 fixed = True
+            if pdbline.endswith('H'):
+                self.num_fixed_lines += 1
+                return None, lastnum
         self.num_fixed_lines += 1 if fixed else 0
-        return pdbline
+        return pdbline + '\n', max(newnum, lastnum)
 
     def get_linkage(self, line):
         """Get the linkage information from a LINK entry PDB line."""

diff --git a/plip/modules/visualize.py b/plip/modules/visualize.py
@@ -33,6 +33,18 @@
 metal_info = namedtuple('metal_info', 'metal_id, target_id location')
 
 
+def select_by_ids(selname, idlist, selection_exists=False, chunksize=20):
+    """Selection with a large number of ids concatenated into a selection
+    list can cause buffer overflow in PyMOL. This function takes a selection
+    name and and list of IDs (list of integers) as input and makes a careful
+    step-by-step selection (packages of 20 by default)"""
+    idlist = list(set(idlist))  # Remove duplicates
+    if not selection_exists:
+        cmd.select(selname, 'None')  # Empty selection first
+    idchunks = [idlist[i:i+chunksize] for i in xrange(0, len(idlist), chunksize)]
+    for idchunk in idchunks:
+        cmd.select(selname, '%s or (id %s)' % (selname, '+'.join(map(str, idchunk))))
+
 class PyMOLComplex:
     """Contains all information on a complex relevant for visualization. Can be pickled"""
     def __init__(self, mol, site):
@@ -208,7 +220,7 @@ def visualize_in_pymol(plcomplex):
 
     # Visualize and color metal ions if there are any
     if not len(metal_ids) == 0:
-        cmd.select(ligname, '%s or id %s' % (ligname, metal_ids_str))
+        select_by_ids(ligname, metal_ids, selection_exists=True)
         cmd.show('spheres', 'id %s and %s' % (metal_ids_str, pdbid))
 
     # Additionally, select all members of composite ligands
@@ -244,7 +256,7 @@ def visualize_in_pymol(plcomplex):
     if not len(plcomplex.hydrophobic_contacts.bs_ids) == 0:
         for h in [['Hydrophobic-P', plcomplex.hydrophobic_contacts.bs_ids],
                   ['Hydrophobic-L', plcomplex.hydrophobic_contacts.lig_ids]]:
-            cmd.select(h[0], 'id %s' % '+'.join(map(str, h[1])))
+            select_by_ids(h[0], h[1])
         for i in plcomplex.hydrophobic_contacts.pairs_ids:
             cmd.select('tmp_bs', 'id %i' % i[0])
             cmd.select('tmp_lig', 'id %i' % i[1])
@@ -262,7 +274,7 @@ def visualize_in_pymol(plcomplex):
     for group in [['HBondDonor-L', plcomplex.hbonds.lig_don_id], ['HBondDonor-P', plcomplex.hbonds.prot_don_id],
                   ['HBondAccept-L', plcomplex.hbonds.lig_acc_id], ['HBondAccept-P', plcomplex.hbonds.prot_acc_id]]:
         if not len(group[1]) == 0:
-            cmd.select(group[0], 'id %s' % '+'.join(map(str, group[1])))
+            select_by_ids(group[0], group[1])
     for i in plcomplex.hbonds.ldon_id:
         cmd.select('tmp_bs', 'id %i' % i[0])
         cmd.select('tmp_lig', 'id %i' % i[1])
@@ -287,8 +299,9 @@ def visualize_in_pymol(plcomplex):
             cmd.select(group[0], 'id %i' % group[1])
         cmd.distance('HalogenBonds', 'tmp_bs', 'tmp_lig')
     if not len(all_acc_o) == 0:
-        cmd.select('HalogenAccept', 'id %s' % '+'.join(map(str, all_acc_o)))
-        cmd.select('HalogenDonor', 'id %s' % '+'.join(map(str, all_don_x)))
+        select_by_ids('HalogenAccept', all_acc_o)
+        select_by_ids('HalogenDonor', all_don_x)
+        #cmd.select('HalogenDonor', 'id %s' % '+'.join(map(str, all_don_x)))
     if object_exists('HalogenBonds'):
         cmd.set('dash_color', 'greencyan', 'HalogenBonds')
 
@@ -408,7 +421,7 @@ def visualize_in_pymol(plcomplex):
     ###################
 
     if not len(plcomplex.metal_complexes) == 0:
-        cmd.select('Metal-M', 'id %s' % metal_ids_str)
+        select_by_ids('Metal-M', metal_ids)
         for metal_complex in plcomplex.metal_complexes:
             cmd.select('tmp_m', 'id %i' % metal_complex.metal_id)
             cmd.select('tmp_t', 'id %i' % metal_complex.target_id)
@@ -467,8 +480,10 @@ def visualize_in_pymol(plcomplex):
         if object_exists(ligname):
             cmd.zoom(ligname, 3)
 
-    cmd.set('sphere_scale', 0.2, 'resn HOH')  # Needs to be done here because of the copy made
-    cmd.set('sphere_transparency', 0.4, '!resn HOH')
+    # Resize water molecules. Sometimes they are not heteroatoms HOH, but part of the protein
+    cmd.set('sphere_scale', 0.2, 'resn HOH or Water')  # Needs to be done here because of the copy made
+    cmd.set('sphere_transparency', 0.4, '!(resn HOH or Water)')
+
     cmd.origin(ligname)
     if 'Centroids*' in cmd.get_names("selections"):
         cmd.color('grey80', 'Centroids*')
@@ -481,11 +496,11 @@ def visualize_in_pymol(plcomplex):
     # Selections for unpaired groups #
     ##################################
     if not len(plcomplex.unpaired_hba_idx) == 0:
-        cmd.select('Unpaired-HBA', 'Unpaired-HBA or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hba_idx))
+        select_by_ids('Unpaired-HBA', plcomplex.unpaired_hba_idx, selection_exists=True)
     if not len(plcomplex.unpaired_hbd_idx) == 0:
-        cmd.select('Unpaired-HBD', 'Unpaired-HBD or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hbd_idx))
+        select_by_ids('Unpaired-HBD', plcomplex.unpaired_hbd_idx, selection_exists=True)
     if not len(plcomplex.unpaired_hal_idx) == 0:
-        cmd.select('Unpaired-HAL', 'Unpaired-HAL or id %s' % '+'.join(str(idx) for idx in plcomplex.unpaired_hal_idx))
+        select_by_ids('Unpaired-HAL', plcomplex.unpaired_hal_idx, selection_exists=True)
 
     ##############################
     # Organization of selections #

diff --git a/plip/plipcmd b/plip/plipcmd
@@ -45,7 +45,7 @@ import multiprocessing
 # External libraries
 import lxml.etree as et
 
-__version__ = '1.2.2'
+__version__ = '1.2.3'
 descript = "Protein-Ligand Interaction Profiler (PLIP) v%s " \
            "is a command-line based tool to analyze interactions in a protein-ligand complex. " \
            "If you are using PLIP in your work, please cite: " \
@@ -89,11 +89,11 @@ def fetch_pdb(pdbid):
         sysexit(3, 'Invalid PDB ID (Entry does not exist on PDB server)')
     message('Downloading file from PDB ... ')
     pdburl = 'http://www.rcsb.org/pdb/files/%s.pdb' % current_entry  # Get URL for current entry
-    pdbfile = None
-    try:
-        pdbfile = urllib2.urlopen(pdburl).read()
-    except urllib2.HTTPError:
-        sysexit(5, "Error: No file in PDB format available from wwPDB for the given PDB ID.")
+    pdbfile = urllib2.urlopen(pdburl).read()
+    # If no PDB file is available, a text is now shown with "We're sorry, but ..."
+    # Could previously be distinguished by an HTTP error
+    if 'sorry' in pdbfile:
+        sysexit(5, "Error: No file in PDB format available from wwPDB for the given PDB ID.\n")
     return [pdbfile, current_entry]
 
 

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
 from setuptools import setup
 
 setup(name='plip',
-      version='1.2.2a',
+      version='1.2.3',
       description='PLIP - Fully automated protein-ligand interaction profiler',
       classifiers=[
           'Development Status :: 5 - Production/Stable',