Skip to content

Commit

Permalink
Merge pull request #251 from cokelaer/fix_uniprot_failedids
Browse files Browse the repository at this point in the history
Fix uniprot failedids
  • Loading branch information
cokelaer authored Mar 8, 2023
2 parents 5c5739c + 0567295 commit ea6c74b
Show file tree
Hide file tree
Showing 14 changed files with 251 additions and 104 deletions.
15 changes: 13 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
.. image:: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png
:target: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png


#################################################################################
BIOSERVICES: access to biological web services programmatically
#################################################################################
Expand All @@ -16,8 +20,8 @@ BIOSERVICES: access to biological web services programmatically
.. image:: https://static.pepy.tech/personalized-badge/bioservices?period=month&units=international_system&left_color=black&right_color=orange&left_text=Downloads
:target: https://pepy.tech/project/bioservices

.. image:: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png
:target: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png
|Codacy-Grade|



:Python_version_available: BioServices is tested for Python 3.7, 3.8, 3.9, 3.10
Expand Down Expand Up @@ -189,6 +193,11 @@ Changelog
========= ====================================================================
Version Description
========= ====================================================================
1.11.2 * Update COG service to be more user-friendly and return all pages
by default
* uniprot set progress to False in the search method
* Merged #250 and #249 user PRs (compress option in uniprot module
and logging issue in biodbnet)
1.11.1 * Fix regression i uniprot.mapping
(https://github.com/cokelaer/bioservices/issues/245)
1.11.0 * Fix uniprot limitation of 25 results only (
Expand Down Expand Up @@ -240,3 +249,5 @@ Version Description
========= ====================================================================


.. |Codacy-Grade| image:: https://app.codacy.com/project/badge/Grade/9b8355ff642f4de9acd4b270f8d14d10
:target: https://app.codacy.com/gh/cokelaer/bioservices/dashboard
2 changes: 0 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
if sphinx.version_info[:2] >= (1, 4)
else 'sphinx.ext.pngmath'),
'sphinx.ext.coverage',
'sphinx_copybutton',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
Expand All @@ -72,7 +71,6 @@


todo_include_todos=True
jscopybutton_path = "copybutton.js"
autoclass_content = 'both'

# Add any paths that contain templates here, relative to this directory.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

_MAJOR = 1
_MINOR = 11
_MICRO = 1
_MICRO = 2
version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
release = '%d.%d' % (_MAJOR, _MINOR)

Expand Down
2 changes: 1 addition & 1 deletion src/bioservices/biodbnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _interpret_output_db(self, input_db, output_db):
# remove spaces so as to compare the input/output databases with the
# list of databases returned by getInputs
outputs = self._list_to_string(output_db)
#inputResult = self.getInputs()
# inputResult = self.getInputs()
# getOutputsForInput method
outputResult = self.getOutputsForInput(input_db)
outputResult = [this.lower().replace(" ", "") for this in outputResult]
Expand Down
2 changes: 0 additions & 2 deletions src/bioservices/biomodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
logger.name = __name__



__all__ = ["BioModels"]


Expand Down Expand Up @@ -87,7 +86,6 @@ def __init__(self, verbose=True):
"""
self.services = REST(name="BioModels", url=BioModels._url, verbose=verbose)


def _check_format(self, frmt, supported=["json", "xml", "html"]):
if frmt not in supported:
raise ValueError("Supported format for this function are {}. You provided {}".format(supported, frmt))
Expand Down
189 changes: 131 additions & 58 deletions src/bioservices/cog.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# This file is part of bioservices software
#
# Copyright (c) 2013-2014 - EBI-EMBL
# Copyright (c) 2013-2023 - EBI-EMBL
#
# File author(s):
# Thomas Cokelaer <cokelaer@ebi.ac.uk>
Expand Down Expand Up @@ -34,120 +34,193 @@
import io
import sys

from tqdm import tqdm

import pandas as pd

from bioservices.services import REST
from bioservices import logger

logger.name = __name__

try:
import pandas as pd
except:
pass


__all__ = ["COG"]


class COG:
"""Interface to the COG service
Note that in addition to the original COG service from NCBI, this interface also
helps you in searching for orgamism, and retrieve all pages in a single command
(rather than scanning yourself all pages).
Here is an example of getting the COG for ecoli. Your first the exact matching name.
Bioservices provices a function to serch for the exact organism name that will be understood
by the COG service (here Escherichia_coli_K-12_sub_MG1655 ... you cannot guess it really)
::
from bioservices import COG
c = COG()
cogs = c.get_all_cogs() # This is a pandas dataframe
from bioservices import COG
c = COG()
c.search_organism('coli')
# the output of the previous command gives you the name
c.get_cogs_by_orgnanism('Escherichia_coli_K-12_sub_MG1655')
"""

_url = "https://www.ncbi.nlm.nih.gov/research/cog/api"

def __init__(self, verbose=False, cache=False):
"""**Constructor**"""
self.services = REST(name="cog", url=COG._url, verbose=verbose, cache=cache)

def get_cogs(self, page=1):
"""Get COGs. Unfortunately, the API sends 10 COGS at a tine given a
self.show_progress = True

def _get_all(self, service_name="cog", params={}):
page = 1
params["page"] = page
res = self.services.http_get(service_name, frmt="json", params=params)
total = res["count"]

pbar = tqdm(total=total, disable=not self.show_progress, leave=False)

# sometimes, a 404 is returned, let us try several times.
trials = 3

while True:
params["page"] += 1
for _ in range(trials):
other = self.services.http_get(service_name, frmt="json", params=params)
try:
res["results"].extend(other["results"])
break
except TypeError:
pass
except Exception as err:
raise (err)
pbar.update(len(other["results"]))
if other["next"] is None:
break
pbar.close()

return res

def get_cogs(self, **kwargs):
"""Get COGs. Unfortunately, the API sends 10 COGS at a time given a
specific page.
The dictionary returned contains the results, count, previous and next
page.
"""
res = self.services.http_get("cog", frmt="json", params={"page": page})
if kwargs.get("page") is None:
res = self._get_all("cog", params=kwargs)
else:
res = self.services.http_get("cog", frmt="json", params=kwargs)
return res

def get_cogs_by_gene(self, gene):
def get_cogs_by_gene(self, gene, page=None):
"""Filter COGs by gene tag: MK0280"""
res = self.services.http_get("cog", frmt="json", params={"gene": gene})
return res
return self.get_cogs(**{"gene": gene, "page": page})

def get_cogs_by_id(self, cog_id):
def get_cogs_by_id(self, cog_id, page=None):
"""Filter COGs by COG ID tag: COG0003"""
res = self.services.http_get("cog", frmt="json", params={"cog": cog_id})
return res
return self.get_cogs(**{"cog": cog_id, "page": page})

def get_cogs_by_assembly_id(self, assembly_id):
def get_cogs_by_assembly_id(self, assembly_id, page=None):
"""Filter COGs by assembly ID: GCA_000007185.1"""
res = self.services.http_get("cog", frmt="json", params={"assembly": assembly_id})
return res
return self.get_cogs(**{"assembly": assembly_id, "page": page})

def get_cogs_by_orgnanism(self, name):
def get_cogs_by_organism(self, name, page=None):
"""Filter COGs by organism name: Nitrosopumilus_maritimus_SCM1"""
res = self.services.http_get("cog", frmt="json", params={"organism": name})
return res
return self.get_cogs(**{"organism": name, "page": page})

def get_cogs_by_taxon_id(self, taxon_id):
def get_cogs_by_taxon_id(self, taxon_id, page=None):
"""Filter COGs by taxid: 1229908"""
res = self.services.http_get("cog", frmt="json", params={"taxid": taxon_id})
return res
return self.get_cogs(**{"taxid": taxon_id, "page": page})

def get_cogs_by_category(self, category):
def get_cogs_by_category(self, category, page=None):
"""Filter COGs by Taxonomic Category: ACTINOBACTERIA"""
res = self.services.http_get("cog", frmt="json", params={"category": category})
return res
return self.get_cogs(**{"category": category, "page": page})

def get_cogs_by_category_id(self, category):
def get_cogs_by_category_id(self, category, page=None):
"""Filter COGs by Taxonomic Category taxid: 651137"""
res = self.services.http_get("cog", frmt="json", params={"cat_taxid": category})
return res
return self.get_cogs(**{"cat_taxid": category, "page": page})

def get_cogs_by_category_(self, protein):
def get_cogs_by_protein_name(self, protein, page=None):
"""Filter COGs by Protein name: AJP49128.1"""
res = self.services.http_get("cog", frmt="json", params={"protein": protein})
return res

# The search keywords (cog, assembly, organism, taxid, category, cat_taxid and protein)
# can be combined to filter the COG lists.
return self.get_cogs(**{"protein": protein, "page": page})

def get_cogs_by_id_and_category(self, cog_id, category):
def get_cogs_by_id_and_category(self, cog_id, category, page=None):
"""Filter COGs by COG id and Taxonomy Categories: COG0004 and CYANOBACTERIA"""
res = self.services.http_get("cog", frmt="json", params={"cog": cog_id, "category": category})
return res
return self.get_cogs(**{"cog": cog_id, "category": category, "page": page})

def get_cogs_by_id_and_organism(self, cog_id, organism):
def get_cogs_by_id_and_organism(self, cog_id, organism, page=None):
"""Filter COGs by COG id and organism: COG0004 and Escherichia_coli_K-12_sub_MG1655"""
res = self.services.http_get("cog", frmt="json", params={"cog": cog_id, "organism,": organism})
return res
return self.get_cogs(**{"cog": cog_id, "organism,": organism, "page": page})

def get_all_cogs_definition(self):
def get_all_cogs_definition(self, page=None):
"""Get all COG Definitions:"""
res = self.services.http_get("cogdef", frmt="json")
if page is None:
self._get_all("cogdef")
else:
res = self.services.http_get("cogdef", frmt="json", params={"page": page})
return res

def get_cog_definition_by_cog_id(self, cog_id):
"""Get specific COG Definitions by COG: COG0003"""
res = self.services.http_get("cogdef", frmt="json", params={"cog": cog_id})
return res
return self.services.http_get("cogdef", frmt="json", params={"cog": cog_id})

def get_cog_definition_by_name(self, cog):
def get_cog_definition_by_name(self, cog, page=None):
"""Get specific COG Definitions by name: Thiamin-binding stress-response protein YqgV, UPF0045 family"""
res = self.services.http_get("cogdef", frmt="json", params={"name": cog})

if page is None:
res = self._get_all("cogdef", params={"name": cog})
else:
res = self.services.http_get("cogdef", frmt="json", params={"name": cog})
return res

def get_taxonomic_categories(self):
"""Get all Taxonomic Categories:"""
res = self.services.http_get("taxonomy", frmt="json")
def get_taxonomic_categories(self, page=None):
"""Get all Taxonomic Categories.
if page is set, only that page is returned. There are 10 entires per page.
if page is unset (default), all results are returned.
::
from bioservices import COG
c = COG()
names = [x['name'] for x in c.get_taxonomic_categories()['results']]
"""
if page is None:
res = self._get_all("taxonomy", params={})
else:
res = self.services.http_get("taxonomy", frmt="json", params={"page": page})

return res

def get_taxonomic_category_by_name(self, name):
"""Get specific Taxonomic Category by name: ALPHAPROTEOBACTERIA"""
res = self.services.http_get("taxonomy", frmt="json", params={"name": name})
def get_taxonomic_category_by_name(self, name, page=None):
"""Get specific Taxonomic Category by name
c.get_taxonomic_category_by_name("ALPHAPROTEOBACTERIA")
"""
if page is None:
res = self._get_all("taxonomy", params={"name": name})
else:
res = self.services.http_get("taxonomy", frmt="json", params={"name": name, "page": page})
return res

def search_organism(self, name):
"""Return candidates that match the input name.
:param str name:
:return: list of items. Each item is a dictionary with genome name, assembly identifier and taxon identifier.
"""
results = self.get_taxonomic_categories()
candidates = []
for x in results["results"]:
for y in x["organisms"]:
if "coli" in y["genome_name"].lower():
candidates.append(y)
return candidates
2 changes: 0 additions & 2 deletions src/bioservices/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,3 @@ def search(self, database_or_query=None, query=None, frmt="json"):
headers = self.services.get_headers(content=frmt)
res = self.services.http_get(url, frmt=frmt, headers=headers)
return res


4 changes: 3 additions & 1 deletion src/bioservices/kegg.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,9 @@ def list(self, query, organism=None):

if organism:
if organism not in self.organismIds:
self.services.logging.error("""Invalid organism provided (%s). See the organismIds attribute""" % organism)
self.services.logging.error(
"""Invalid organism provided (%s). See the organismIds attribute""" % organism
)
raise BioServicesError("Not a valid organism")
if query not in ["pathway", "module"]:
self.services.logging.error(
Expand Down
6 changes: 3 additions & 3 deletions src/bioservices/pdbe.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class PDBe:
>>> from bioservices import PDBe
>>> s = PDBe()
>>> res = s.get_file("1FBV", "pdb")
>>> res = s.get_files("1FBV")
"""

Expand All @@ -57,8 +57,8 @@ def __init__(self, verbose=False, cache=False):
:param bool verbose: prints informative messages (default is off)
"""
url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/"
self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache)
url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry"
self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache, url_defined_later=True)

def _check_id(self, pdbid):
if isinstance(pdbid, list):
Expand Down
6 changes: 1 addition & 5 deletions src/bioservices/quickgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,7 @@ def __init__(self, verbose=False, cache=False):
"""
# super(QuickGO, self).__init__(url="http://www.ebi.ac.uk/QuickGO-Old",
self.services = REST(url="https://www.ebi.ac.uk/QuickGO",
name="quickGO",
verbose=verbose,
cache=cache
)
self.services = REST(url="https://www.ebi.ac.uk/QuickGO", name="quickGO", verbose=verbose, cache=cache)

def go_search(self, query, limit=600, page=1):
"""Searches a simple user query, e.g., query=apopto
Expand Down
Loading

0 comments on commit ea6c74b

Please sign in to comment.