diff --git a/README.rst b/README.rst index a6637d0..9465e01 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,7 @@ +.. image:: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png + :target: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png + + ################################################################################# BIOSERVICES: access to biological web services programmatically ################################################################################# @@ -16,8 +20,8 @@ BIOSERVICES: access to biological web services programmatically .. image:: https://static.pepy.tech/personalized-badge/bioservices?period=month&units=international_system&left_color=black&right_color=orange&left_text=Downloads :target: https://pepy.tech/project/bioservices -.. image:: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png - :target: https://raw.githubusercontent.com/cokelaer/bioservices/main/doc/_static/bioservices2_logo_256.png +|Codacy-Grade| + :Python_version_available: BioServices is tested for Python 3.7, 3.8, 3.9, 3.10 @@ -189,6 +193,11 @@ Changelog ========= ==================================================================== Version Description ========= ==================================================================== +1.11.2 * Update COG service to be more user-friendly and return all pages + by default + * uniprot set progress to False in the search method + * Merged #250 and #249 user PRs (compress option in uniprot module + and logging issue in biodbnet) 1.11.1 * Fix regression i uniprot.mapping (https://github.com/cokelaer/bioservices/issues/245) 1.11.0 * Fix uniprot limitation of 25 results only ( @@ -240,3 +249,5 @@ Version Description ========= ==================================================================== +.. |Codacy-Grade| image:: https://app.codacy.com/project/badge/Grade/9b8355ff642f4de9acd4b270f8d14d10 + :target: https://app.codacy.com/gh/cokelaer/bioservices/dashboard diff --git a/doc/conf.py b/doc/conf.py index 0f14d28..756de22 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -57,7 +57,6 @@ if sphinx.version_info[:2] >= (1, 4) else 'sphinx.ext.pngmath'), 'sphinx.ext.coverage', - 'sphinx_copybutton', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', @@ -72,7 +71,6 @@ todo_include_todos=True -jscopybutton_path = "copybutton.js" autoclass_content = 'both' # Add any paths that contain templates here, relative to this directory. diff --git a/setup.py b/setup.py index 50def8f..71a2063 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ _MAJOR = 1 _MINOR = 11 -_MICRO = 1 +_MICRO = 2 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) diff --git a/src/bioservices/biodbnet.py b/src/bioservices/biodbnet.py index c950925..22c265e 100644 --- a/src/bioservices/biodbnet.py +++ b/src/bioservices/biodbnet.py @@ -89,7 +89,7 @@ def _interpret_output_db(self, input_db, output_db): # remove spaces so as to compare the input/output databases with the # list of databases returned by getInputs outputs = self._list_to_string(output_db) - #inputResult = self.getInputs() + # inputResult = self.getInputs() # getOutputsForInput method outputResult = self.getOutputsForInput(input_db) outputResult = [this.lower().replace(" ", "") for this in outputResult] diff --git a/src/bioservices/biomodels.py b/src/bioservices/biomodels.py index 4684f8a..3d8007e 100644 --- a/src/bioservices/biomodels.py +++ b/src/bioservices/biomodels.py @@ -40,7 +40,6 @@ logger.name = __name__ - __all__ = ["BioModels"] @@ -87,7 +86,6 @@ def __init__(self, verbose=True): """ self.services = REST(name="BioModels", url=BioModels._url, verbose=verbose) - def _check_format(self, frmt, supported=["json", "xml", "html"]): if frmt not in supported: raise ValueError("Supported format for this function are {}. You provided {}".format(supported, frmt)) diff --git a/src/bioservices/cog.py b/src/bioservices/cog.py index 3a8156c..f1d66ba 100644 --- a/src/bioservices/cog.py +++ b/src/bioservices/cog.py @@ -1,7 +1,7 @@ # # This file is part of bioservices software # -# Copyright (c) 2013-2014 - EBI-EMBL +# Copyright (c) 2013-2023 - EBI-EMBL # # File author(s): # Thomas Cokelaer @@ -34,16 +34,15 @@ import io import sys +from tqdm import tqdm + +import pandas as pd + from bioservices.services import REST from bioservices import logger logger.name = __name__ -try: - import pandas as pd -except: - pass - __all__ = ["COG"] @@ -51,11 +50,21 @@ class COG: """Interface to the COG service + Note that in addition to the original COG service from NCBI, this interface also + helps you in searching for orgamism, and retrieve all pages in a single command + (rather than scanning yourself all pages). + + Here is an example of getting the COG for ecoli. Your first the exact matching name. + Bioservices provices a function to serch for the exact organism name that will be understood + by the COG service (here Escherichia_coli_K-12_sub_MG1655 ... you cannot guess it really) + :: - from bioservices import COG - c = COG() - cogs = c.get_all_cogs() # This is a pandas dataframe + from bioservices import COG + c = COG() + c.search_organism('coli') + # the output of the previous command gives you the name + c.get_cogs_by_orgnanism('Escherichia_coli_K-12_sub_MG1655') """ _url = "https://www.ncbi.nlm.nih.gov/research/cog/api" @@ -63,91 +72,155 @@ class COG: def __init__(self, verbose=False, cache=False): """**Constructor**""" self.services = REST(name="cog", url=COG._url, verbose=verbose, cache=cache) - - def get_cogs(self, page=1): - """Get COGs. Unfortunately, the API sends 10 COGS at a tine given a + self.show_progress = True + + def _get_all(self, service_name="cog", params={}): + page = 1 + params["page"] = page + res = self.services.http_get(service_name, frmt="json", params=params) + total = res["count"] + + pbar = tqdm(total=total, disable=not self.show_progress, leave=False) + + # sometimes, a 404 is returned, let us try several times. + trials = 3 + + while True: + params["page"] += 1 + for _ in range(trials): + other = self.services.http_get(service_name, frmt="json", params=params) + try: + res["results"].extend(other["results"]) + break + except TypeError: + pass + except Exception as err: + raise (err) + pbar.update(len(other["results"])) + if other["next"] is None: + break + pbar.close() + + return res + + def get_cogs(self, **kwargs): + """Get COGs. Unfortunately, the API sends 10 COGS at a time given a specific page. The dictionary returned contains the results, count, previous and next page. """ - res = self.services.http_get("cog", frmt="json", params={"page": page}) + if kwargs.get("page") is None: + res = self._get_all("cog", params=kwargs) + else: + res = self.services.http_get("cog", frmt="json", params=kwargs) return res - def get_cogs_by_gene(self, gene): + def get_cogs_by_gene(self, gene, page=None): """Filter COGs by gene tag: MK0280""" - res = self.services.http_get("cog", frmt="json", params={"gene": gene}) - return res + return self.get_cogs(**{"gene": gene, "page": page}) - def get_cogs_by_id(self, cog_id): + def get_cogs_by_id(self, cog_id, page=None): """Filter COGs by COG ID tag: COG0003""" - res = self.services.http_get("cog", frmt="json", params={"cog": cog_id}) - return res + return self.get_cogs(**{"cog": cog_id, "page": page}) - def get_cogs_by_assembly_id(self, assembly_id): + def get_cogs_by_assembly_id(self, assembly_id, page=None): """Filter COGs by assembly ID: GCA_000007185.1""" - res = self.services.http_get("cog", frmt="json", params={"assembly": assembly_id}) - return res + return self.get_cogs(**{"assembly": assembly_id, "page": page}) - def get_cogs_by_orgnanism(self, name): + def get_cogs_by_organism(self, name, page=None): """Filter COGs by organism name: Nitrosopumilus_maritimus_SCM1""" - res = self.services.http_get("cog", frmt="json", params={"organism": name}) - return res + return self.get_cogs(**{"organism": name, "page": page}) - def get_cogs_by_taxon_id(self, taxon_id): + def get_cogs_by_taxon_id(self, taxon_id, page=None): """Filter COGs by taxid: 1229908""" - res = self.services.http_get("cog", frmt="json", params={"taxid": taxon_id}) - return res + return self.get_cogs(**{"taxid": taxon_id, "page": page}) - def get_cogs_by_category(self, category): + def get_cogs_by_category(self, category, page=None): """Filter COGs by Taxonomic Category: ACTINOBACTERIA""" - res = self.services.http_get("cog", frmt="json", params={"category": category}) - return res + return self.get_cogs(**{"category": category, "page": page}) - def get_cogs_by_category_id(self, category): + def get_cogs_by_category_id(self, category, page=None): """Filter COGs by Taxonomic Category taxid: 651137""" - res = self.services.http_get("cog", frmt="json", params={"cat_taxid": category}) - return res + return self.get_cogs(**{"cat_taxid": category, "page": page}) - def get_cogs_by_category_(self, protein): + def get_cogs_by_protein_name(self, protein, page=None): """Filter COGs by Protein name: AJP49128.1""" - res = self.services.http_get("cog", frmt="json", params={"protein": protein}) - return res - - # The search keywords (cog, assembly, organism, taxid, category, cat_taxid and protein) - # can be combined to filter the COG lists. + return self.get_cogs(**{"protein": protein, "page": page}) - def get_cogs_by_id_and_category(self, cog_id, category): + def get_cogs_by_id_and_category(self, cog_id, category, page=None): """Filter COGs by COG id and Taxonomy Categories: COG0004 and CYANOBACTERIA""" - res = self.services.http_get("cog", frmt="json", params={"cog": cog_id, "category": category}) - return res + return self.get_cogs(**{"cog": cog_id, "category": category, "page": page}) - def get_cogs_by_id_and_organism(self, cog_id, organism): + def get_cogs_by_id_and_organism(self, cog_id, organism, page=None): """Filter COGs by COG id and organism: COG0004 and Escherichia_coli_K-12_sub_MG1655""" - res = self.services.http_get("cog", frmt="json", params={"cog": cog_id, "organism,": organism}) - return res + return self.get_cogs(**{"cog": cog_id, "organism,": organism, "page": page}) - def get_all_cogs_definition(self): + def get_all_cogs_definition(self, page=None): """Get all COG Definitions:""" - res = self.services.http_get("cogdef", frmt="json") + if page is None: + self._get_all("cogdef") + else: + res = self.services.http_get("cogdef", frmt="json", params={"page": page}) return res def get_cog_definition_by_cog_id(self, cog_id): """Get specific COG Definitions by COG: COG0003""" - res = self.services.http_get("cogdef", frmt="json", params={"cog": cog_id}) - return res + return self.services.http_get("cogdef", frmt="json", params={"cog": cog_id}) - def get_cog_definition_by_name(self, cog): + def get_cog_definition_by_name(self, cog, page=None): """Get specific COG Definitions by name: Thiamin-binding stress-response protein YqgV, UPF0045 family""" - res = self.services.http_get("cogdef", frmt="json", params={"name": cog}) + + if page is None: + res = self._get_all("cogdef", params={"name": cog}) + else: + res = self.services.http_get("cogdef", frmt="json", params={"name": cog}) return res - def get_taxonomic_categories(self): - """Get all Taxonomic Categories:""" - res = self.services.http_get("taxonomy", frmt="json") + def get_taxonomic_categories(self, page=None): + """Get all Taxonomic Categories. + + if page is set, only that page is returned. There are 10 entires per page. + if page is unset (default), all results are returned. + + + :: + + from bioservices import COG + c = COG() + names = [x['name'] for x in c.get_taxonomic_categories()['results']] + + """ + if page is None: + res = self._get_all("taxonomy", params={}) + else: + res = self.services.http_get("taxonomy", frmt="json", params={"page": page}) + return res - def get_taxonomic_category_by_name(self, name): - """Get specific Taxonomic Category by name: ALPHAPROTEOBACTERIA""" - res = self.services.http_get("taxonomy", frmt="json", params={"name": name}) + def get_taxonomic_category_by_name(self, name, page=None): + """Get specific Taxonomic Category by name + + + c.get_taxonomic_category_by_name("ALPHAPROTEOBACTERIA") + """ + if page is None: + res = self._get_all("taxonomy", params={"name": name}) + else: + res = self.services.http_get("taxonomy", frmt="json", params={"name": name, "page": page}) return res + + def search_organism(self, name): + """Return candidates that match the input name. + + :param str name: + :return: list of items. Each item is a dictionary with genome name, assembly identifier and taxon identifier. + + """ + results = self.get_taxonomic_categories() + candidates = [] + for x in results["results"]: + for y in x["organisms"]: + if "coli" in y["genome_name"].lower(): + candidates.append(y) + return candidates diff --git a/src/bioservices/hgnc.py b/src/bioservices/hgnc.py index e54d526..3dc8631 100644 --- a/src/bioservices/hgnc.py +++ b/src/bioservices/hgnc.py @@ -155,5 +155,3 @@ def search(self, database_or_query=None, query=None, frmt="json"): headers = self.services.get_headers(content=frmt) res = self.services.http_get(url, frmt=frmt, headers=headers) return res - - diff --git a/src/bioservices/kegg.py b/src/bioservices/kegg.py index 13c7c6e..1f038d0 100644 --- a/src/bioservices/kegg.py +++ b/src/bioservices/kegg.py @@ -434,7 +434,9 @@ def list(self, query, organism=None): if organism: if organism not in self.organismIds: - self.services.logging.error("""Invalid organism provided (%s). See the organismIds attribute""" % organism) + self.services.logging.error( + """Invalid organism provided (%s). See the organismIds attribute""" % organism + ) raise BioServicesError("Not a valid organism") if query not in ["pathway", "module"]: self.services.logging.error( diff --git a/src/bioservices/pdbe.py b/src/bioservices/pdbe.py index b51f43a..17749de 100644 --- a/src/bioservices/pdbe.py +++ b/src/bioservices/pdbe.py @@ -47,7 +47,7 @@ class PDBe: >>> from bioservices import PDBe >>> s = PDBe() - >>> res = s.get_file("1FBV", "pdb") + >>> res = s.get_files("1FBV") """ @@ -57,8 +57,8 @@ def __init__(self, verbose=False, cache=False): :param bool verbose: prints informative messages (default is off) """ - url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/" - self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache) + url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry" + self.services = REST(name="PDBe", url=url, verbose=verbose, cache=cache, url_defined_later=True) def _check_id(self, pdbid): if isinstance(pdbid, list): diff --git a/src/bioservices/quickgo.py b/src/bioservices/quickgo.py index dcb01e7..4952e4b 100644 --- a/src/bioservices/quickgo.py +++ b/src/bioservices/quickgo.py @@ -60,11 +60,7 @@ def __init__(self, verbose=False, cache=False): """ # super(QuickGO, self).__init__(url="http://www.ebi.ac.uk/QuickGO-Old", - self.services = REST(url="https://www.ebi.ac.uk/QuickGO", - name="quickGO", - verbose=verbose, - cache=cache - ) + self.services = REST(url="https://www.ebi.ac.uk/QuickGO", name="quickGO", verbose=verbose, cache=cache) def go_search(self, query, limit=600, page=1): """Searches a simple user query, e.g., query=apopto diff --git a/src/bioservices/reactome.py b/src/bioservices/reactome.py index 3365750..cc970af 100644 --- a/src/bioservices/reactome.py +++ b/src/bioservices/reactome.py @@ -578,5 +578,3 @@ def __str__(self): return txt """ - - diff --git a/src/bioservices/uniprot.py b/src/bioservices/uniprot.py index 5ba45e2..01b59c5 100644 --- a/src/bioservices/uniprot.py +++ b/src/bioservices/uniprot.py @@ -577,7 +577,7 @@ def search( limit=None, size=25, database="uniprotkb", - progress=True, + progress=False, ): """Provide some interface to the uniprot search interface. @@ -611,15 +611,16 @@ def search( >>> u.search('zap70+AND+organism_id:9606') >>> u.search("zap70+AND+taxonomy_id:9606", frmt="tsv", limit=3, - ... columns="entry_name,length,id, gene_names") - Entry name Length Entry Gene names - CBLB_HUMAN 982 Q13191 CBLB RNF56 Nbla00127 - CBL_HUMAN 906 P22681 CBL CBL2 RNF55 - CD3Z_HUMAN 164 P20963 CD247 CD3Z T3Z TCRZ + ... columns="accession,length,id, gene_names") + Entry Length Entry Name Gene Names + P43403 619 ZAP70_HUMAN ZAP70 SRK + P22681 906 CBL_HUMAN CBL CBL2 RNF55 + P20963 164 CD3Z_HUMAN CD247 CD3Z T3Z TCRZ + other examples:: - >>> u.search("ZAP70+AND+organism_id:9606", limit=3, columns="id,xref_pdb") + >> u.search("ZAP70+AND+organism_id:9606", limit=3, columns="id,xref_pdb") You can also do a search on several keywords. This is especially useful if you have a list of known entry names.:: diff --git a/test/webservices/test_cog.py b/test/webservices/test_cog.py index 737092e..f570ddf 100644 --- a/test/webservices/test_cog.py +++ b/test/webservices/test_cog.py @@ -1,22 +1,87 @@ from bioservices.cog import COG -import pytest def test_cog(): c = COG() - c.get_cogs() - c.get_cogs_by_gene("MK0280") - c.get_cogs_by_id("COG0003") - c.get_cogs_by_assembly_id("GCA_000007185.1") - c.get_cogs_by_orgnanism("Nitrosopumilus_maritimus_SCM1") - c.get_cogs_by_taxon_id("1229908") - c.get_cogs_by_category("ACTINOBACTERIA") - c.get_cogs_by_category_id("651137") - c.get_cogs_by_category_("AJP49128.1") - c.get_cogs_by_id_and_category("COG0004", "CYANOBACTERIA") - c.get_cogs_by_id_and_organism("COG0004", "Escherichia_coli_K-12_sub_MG1655") - c.get_all_cogs_definition() + c.get_cogs(page=2, organism=' Escherichia_coli_K-12_sub_MG1655') + + +def test_cogs_by_gene(): + c = COG() + c.get_cogs_by_gene("MK0280", page=1) + + +def test_cogs_by_id(): + c = COG() + c.get_cogs_by_id("COG0003", page=1) + + +def test_cogs_by_assembly_id(): + c = COG() + c.get_cogs_by_assembly_id("GCA_000007185.1", page=1) + + +def test_cogs_by_orgamism(): + c = COG() + c.get_cogs_by_organism("Nitrosopumilus_maritimus_SCM1", page=1) + + +def test_cogs_by_taxon_id(): + c = COG() + c.get_cogs_by_taxon_id("1229908", page=1) + + +def test_cogs_by_category(): + c = COG() + c.get_cogs_by_category("ACTINOBACTERIA", page=1) + + +def test_cogs_by_category_id(): + c = COG() + c.get_cogs_by_category_id("651137", page=1) + + +def test_cogs_by_protein_name(): + c = COG() + c.get_cogs_by_protein_name("AJP49128.1", page=1) + + +def test_cogs_by_id_and_category(): + c = COG() + c.get_cogs_by_id_and_category("COG0004", "CYANOBACTERIA", page=1) + + +def test_cogs_by_id_and_organism(): + c = COG() + c.get_cogs_by_id_and_organism("COG0004", "Escherichia_coli_K-12_sub_MG1655", page=1) + + +def test_get_all_cogs_definition(): + c = COG() + c.get_all_cogs_definition(page=1) + + +def test_cogs_definition_by_cog_id(): + c = COG() c.get_cog_definition_by_cog_id("COG0003") - c.get_cog_definition_by_name("Thiamin-binding stress-response protein YqgV, UPF0045 family") - c.get_taxonomic_categories() - c.get_taxonomic_category_by_name("ALPHAPROTEOBACTERIA") + + +def test_cogs_definition_by_name(): + c = COG() + c.get_cog_definition_by_name("Thiamin-binding stress-response protein YqgV, UPF0045 family", page=1) + + +def test_taxomomic_categories(): + c = COG() + c.get_taxonomic_categories(page=1) + + +def test_taxonomic_category_by_name(): + c = COG() + c.get_taxonomic_category_by_name("ALPHAPROTEOBACTERIA", page=1) + + +def test_search_organism(): + c = COG() + _ = c.search_organism("coli") + diff --git a/test/webservices/test_uniprot.py b/test/webservices/test_uniprot.py index 5dc09f5..7ff24d3 100644 --- a/test/webservices/test_uniprot.py +++ b/test/webservices/test_uniprot.py @@ -97,7 +97,7 @@ def test_quick_search(uniprot): def test_uniref(uniprot): - assert "goTerms" in uniprot.uniref("Q03063") + assert "entryType" in uniprot.uniref("Q03063") def test_get_df(uniprot): @@ -110,4 +110,11 @@ def test_fasta(uniprot): #https://github.com/cokelaer/bioservices/issues/245 def test_mapping_regression(uniprot): - uniprot.mapping("UniProtKB_AC-ID", "KEGG", "P43403,P123456") + # P123456 is a failed ID + res = uniprot.mapping("UniProtKB_AC-ID", "KEGG", "P43403,P123456") + assert res['failedIds'] + # here no failedId but we expect an empty failedIds in the returned dictionary (empty list) + res = uniprot.mapping("UniProtKB_AC-ID", "KEGG", "P43403") + assert res['failedIds'] == [] + +