From f1ae0cd7af2e06c117c5177635f915ef7e6c831a Mon Sep 17 00:00:00 2001
From: Thomas Cokelaer <cokelaer@gmail.com>
Date: Wed, 29 Jun 2022 21:44:12 +0200
Subject: [PATCH 1/5] Fixes #217 (kegg new keywords)

---
 src/bioservices/kegg.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/bioservices/kegg.py b/src/bioservices/kegg.py
index 8d17969..b053a2e 100644
--- a/src/bioservices/kegg.py
+++ b/src/bioservices/kegg.py
@@ -1354,29 +1354,30 @@ def _parse(self, res):
                 output[key] = data
             # strip only: expecting a single line (string)
             elif key in [
-                "POSITION",
+                "ANNOTATION",
+                "CATEGORY",
+                "CLASS",
+                "COMPOSITION",
+                "CREATED",
+                "DATA_SOURCE",
+                "DEFINITION",
                 "DESCRIPTION",
                 "ENTRY",
-                "ORGANISM",
-                "CLASS",
-                "SYMBOL",
+                "EFFICACY",
+                "EQUATION",
                 "FORMULA",
                 "KEYWORDS",
-                "CATEGORY",
-                "ANNOTATION",
-                "DATA_SOURCE",
+                "HISTORY",
                 "MASS",
-                "COMPOSITION",
-                "STRUCTURE",
+                "ORGANISM",
                 "ORG_CODE",
-                "CREATED",
-                "DEFINITION",
+                "POSITION",
+                "RCLASS",
                 "KO_PATHWAY",
-                "EQUATION",
+                "SYMBOL",
+                "STRUCTURE",
                 "TYPE",
-                "RCLASS",
                 "SYSNAME",
-                "HISTORY",
                 "REL_PATHWAY",
             ]:
                 # get rid of \n

From 9079a1ab97ed7f7b3274da537c83247659a4d826 Mon Sep 17 00:00:00 2001
From: Thomas Cokelaer <cokelaer@gmail.com>
Date: Fri, 1 Jul 2022 15:09:56 +0200
Subject: [PATCH 2/5] update bioservices uniprot using new API

---
 src/bioservices/uniprot.py       | 978 ++++++++++++++++---------------
 test/webservices/test_uniprot.py |  50 +-
 2 files changed, 521 insertions(+), 507 deletions(-)

diff --git a/src/bioservices/uniprot.py b/src/bioservices/uniprot.py
index 1cddbbe..b6797ba 100644
--- a/src/bioservices/uniprot.py
+++ b/src/bioservices/uniprot.py
@@ -41,410 +41,416 @@
 """
 import io
 import sys
+import time
+import urllib
+import json
+
+import pandas as pd
 
 from bioservices.services import REST
 from bioservices import logger
 
 logger.name = __name__
 
-try:
-    import pandas as pd
-except:
-    pass
-__all__ = ["UniProt"]
 
-# TODO:: falt files to get list of identifiers
-# http://www.ebi.ac.uk/uniprot/database/download.html
-# grep sp uniprot_sprot.fasta  | grep HUMAN | awk '{print substr($1, 12, length($1))}'
-
-mapping = {
-    "UniProtKB AC/ID": "ACC+ID",
-    "UniProtKB": "ACC",
-    "UniProtKB": "ID",
-    "UniParc": "UPARC",
-    "UniRef50": "NF50",
-    "UniRef90": "NF90",
-    "UniRef100": "NF100",
-    "EMBL/GenBank/DDBJ": "EMBL_ID",
-    "EMBL/GenBank/DDBJ CDS": "EMBL",
-    "PIR": "PIR",
-    "UniGene": "UNIGENE_ID",
-    "Entrez Gene (GeneID)": "P_ENTREZGENEID",
-    "GI number*": "P_GI",
-    "IPI": "P_IPI",
-    "RefSeq Protein": "P_REFSEQ_AC",
-    "RefSeq Nucleotide": "REFSEQ_NT_ID",
-    "PDB": "PDB_ID",
-    "DisProt": "DISPROT_ID",
-    "HSSP": "HSSP_ID",
-    "DIP": "DIP_ID",
-    "MINT": "MINT_ID",
-    "Allergome": "ALLERGOME_ID",
-    "MEROPS": "MEROPS_ID",
-    "mycoCLAP": "MYCOCLAP_ID",
-    "PeroxiBase": "PEROXIBASE_ID",
-    "PptaseDB": "PPTASEDB_ID",
-    "REBASE": "REBASE_ID",
-    "TCDB": "TCDB_ID",
-    "PhosSite": "PHOSSITE_ID",
-    "DMDM": "DMDM_ID",
-    "Aarhus/Ghent-2DPAGE": "AARHUS_GHENT_2DPAGE_ID",
-    "World-2DPAGE": "WORLD_2DPAGE_ID",
-    "DNASU": "DNASU_ID",
-    "Ensembl": "ENSEMBL_ID",
-    "Ensembl Protein": "ENSEMBL_PRO_ID",
-    "Ensembl Transcript": "ENSEMBL_TRS_ID",
-    "Ensembl Genomes": "ENSEMBLGENOME_ID",
-    "Ensembl Genomes Protein": "ENSEMBLGENOME_PRO_ID",
-    "Ensembl Genomes Transcript": "ENSEMBLGENOME_TRS_ID",
-    "GeneID": "P_ENTREZGENEID",
-    "GenomeReviews": "GENOMEREVIEWS_ID",
-    "KEGG": "KEGG_ID",
-    "PATRIC": "PATRIC_ID",
-    "UCSC": "UCSC_ID",
-    "VectorBase": "VECTORBASE_ID",
-    "AGD": "AGD_ID",
-    "ArachnoServer": "ARACHNOSERVER_ID",
-    "CGD": "CGD",
-    "ConoServer": "CONOSERVER_ID",
-    "CYGD": "CYGD_ID",
-    "dictyBase": "DICTYBASE_ID",
-    "EchoBASE": "ECHOBASE_ID",
-    "EcoGene": "ECOGENE_ID",
-    "euHCVdb": "EUHCVDB_ID",
-    "EuPathDB": "EUPATHDB_ID",
-    "FlyBase": "FLYBASE_ID",
-    "GeneCards": "GENECARDS_ID",
-    "GeneFarm": "GENEFARM_ID",
-    "GenoList": "GENOLIST_ID",
-    "H-InvDB": "H_INVDB_ID",
-    "HGNC": "HGNC_ID",
-    "HPA": "HPA_ID",
-    "LegioList": "LEGIOLIST_ID",
-    "Leproma": "LEPROMA_ID",
-    "MaizeGDB": "MAIZEGDB_ID",
-    "MIM": "MIM_ID",
-    "MGI": "MGI_ID",
-    "neXtProt": "NEXTPROT_ID",
-    "Orphanet": "ORPHANET_ID",
-    "PharmGKB": "PHARMGKB_ID",
-    "PomBase": "POMBASE_ID",
-    "PseudoCAP": "PSEUDOCAP_ID",
-    "RGD": "RGD_ID",
-    "SGD": "SGD_ID",
-    "TAIR": "TAIR_ID",
-    "TubercuList": "TUBERCULIST_ID",
-    "WormBase": "WORMBASE_ID",
-    "WormBase Transcript": "WORMBASE_TRS_ID",
-    "WormBase Protein": "WORMBASE_PRO_ID",
-    "Xenbase": "XENBASE_ID",
-    "ZFIN": "ZFIN_ID",
-    "eggNOG": "EGGNOG_ID",
-    "GeneTree": "GENETREE_ID",
-    "HOGENOM": "HOGENOM_ID",
-    "HOVERGEN": "HOVERGEN_ID",
-    "KO": "KO_ID",
-    "OMA": "OMA_ID",
-    "OrthoDB": "ORTHODB_ID",
-    "ProtClustDB": "PROTCLUSTDB_ID",
-    "BioCyc": "BIOCYC_ID",
-    "Reactome": "REACTOME_ID",
-    "UniPathWay": "UNIPATHWAY_ID",
-    "CleanEx": "CLEANEX_ID",
-    "GermOnline": "GERMONLINE_ID",
-    "ChEMBL": "CHEMBL_ID",
-    "ChiTaRS": "CHITARS_ID",
-    "DrugBank": "DRUGBANK_ID",
-    "GenomeRNAi": "GENOMERNAI_ID",
-    "NextBio": "NEXTBIO_ID",
-}
+__all__ = ["UniProt"]
 
 
 class UniProt:
     """Interface to the `UniProt <http://www.uniprot.org>`_ service
 
-    .. rubric:: Identifiers mapping between databases:
-
     ::
 
         >>> from bioservices import UniProt
         >>> u = UniProt(verbose=False)
-        >>> u.mapping("ACC", "KEGG_ID", query='P43403')
+        >>> u.mapping("UniProtKB_AC-ID", "KEGG", query='P43403')
         defaultdict(<type 'list'>, {'P43403': ['hsa:7535']})
         >>> res = u.search("P43403")
 
         # Returns sequence on the ZAP70_HUMAN accession Id
         >>> sequence = u.search("ZAP70_HUMAN", columns="sequence")
 
+
+    .. versionchanged:: 1.10 
+    
+        Uniprot update its service in June 2022. Changes were made in the bioservices
+        API with small changes. User API is more or less the same. Main issues that may 
+        be faced are related to change of output column names. Please see the 
+        :attr:`_legacy_names` for corresponding changes.
+
+        Some notes about searches. The *and* and *or* are now upper cases. 
+        The *organism* and *taxonomy* fields are now *organism_id* and *taxonomy_id*
+
+
     """
 
-    _mapping = mapping.copy()
-    _url = "https://www.uniprot.org"
-    # _valid_columns = ['citation', 'clusters', 'comments', 'database',
-    #                   'domains', 'domain', 'ec', 'id', 'entry name', 'existence',
-    #                   'families', 'feature', 'features', 'genes', 'go', 'go-id', 'interpro',
-    #                   'interactor', 'keywords', 'keyword-id', 'last-modified',
-    #                   'length', 'organism', 'organism-id', 'pathway', 'protein names',
-    #                   'reviewed', 'score', 'sequence', '3d', 'subcellular locations',
-    #                   'taxonomy', 'tools', 'version', 'virus hosts', 'lineage-id',
-    #                   'sequence-modified', 'proteome']
+    # June 2022, API changes and these labels changed:
+    _legacy_names = {
+        'id': 'accession',
+        'entry name': 'id',
+        'genes': 'gene_names',
+        'genes(PREFERRED)':	'gene_primary',
+        'genes(ALTERNATIVE)': 'gene_synonym',
+        'genes(OLN)': 'gene_oln',
+        'genes(ORF)':	'gene_orf',
+        'organism':	'organism_name',
+        'organism-id':	'organism_id',
+        'protein names':	'protein_name',
+        'proteome':	'xref_proteomes',
+        'lineage(ALL)':	'lineage',
+        'virus hosts':	'virus_hosts',
+
+        'comment(ALTERNATIVE PRODUCTS)': 'cc_alternative_products',
+        'feature(ALTERNATIVE SEQUENCE)': 'ft_var_seq',
+        'comment(ERRONEOUS GENE MODEL PREDICTION)': 'error_gmodel_pred',
+        'fragment': 'fragment',
+        'encodedon': 'organelle',
+        'length': 'length',
+        'mass': 'mass',
+        'comment(MASS SPECTROMETRY)': 'cc_mass_spectrometry',
+        'feature(NATURAL VARIANT)': 'ft_variant',
+        'feature(NON ADJACENT RESIDUES)': 'ft_non_cons',
+        'feature(NON STANDARD RESIDUE)': 'ft_non_std',
+        'feature(NON TERMINAL RESIDUE)': 'ft_non_ter',
+        'comment(POLYMORPHISM)': 'cc_polymorphism',
+        'comment(RNA EDITING)': 'cc_rna_editing',
+        'sequence': 'sequence',
+        'comment(SEQUENCE CAUTION)': 'cc_sequence_caution',
+        'feature(SEQUENCE CONFLICT)': 'ft_conflict',
+        'feature(SEQUENCE UNCERTAINTY)': 'ft_unsure',
+        'version(sequence)': 'sequence_version',
+
+        # function
+        'comment(ABSORPTION)': 'absorption',
+        'feature(ACTIVE SITE)': 'ft_act_site',
+        'comment(ACTIVITY REGULATION)': 'cc_activity_regulation',
+        'feature(BINDING SITE)': 'ft_binding',
+        'chebi': 'ft_ca_bind',
+        'chebi(Catalytic activity)': 'cc_catalytic_activity',
+        'chebi(Cofactor)': 'cc_cofactor',
+        'feature(DNA BINDING)': 'ft_dna_bind',
+        'ec': 'ec',
+        'comment(FUNCTION)': 'cc_function',
+        'comment(KINETICS)': 'kinetics',
+        'feature(METAL BINDING)': 'ft_metal',
+        'feature(NP BIND)': 'ft_np_bind',
+        'comment(PATHWAY)': 'cc_pathway',
+        'comment(PH DEPENDENCE)': 'ph_dependence',
+        'comment(REDOX POTENTIAL)': 'redox_potential',
+        'rhea-id': 'rhea_id',
+        'feature(SITE)': 'ft_site',
+        'comment(TEMPERATURE DEPENDENCE)': 'temp_dependence',
+
+        # misc
+        'annotation score': 'annotation_score',
+        'comment(CAUTION)': 'cc_caution',
+        'features': 'feature',
+        'keyword-id': 'keywordid',
+        'keywords': 'keyword',
+        'comment(MISCELLANEOUS)': 'cc_miscellaneous',
+        'existence': 'protein_existence',
+        'reviewed': 'reviewed',
+        'tools': 'tools',
+        'uniparcid': 'uniparc_id',
+
+        # Interaction =============================
+        "interactor": "cc_interaction",
+        "comment(SUBUNIT)": "cc_subunit",
+
+        # GO
+        "go": "go",
+        "go(biological process)": "go_p",
+        "go(cellular component)": "go_c",
+        "go(molecular function)": "go_f",
+        "go-id": "go_id",
+
+        # Date of
+        "created": "date_created",
+        "last-modified": "date_modified",
+        "sequence-modified": "date_sequence_modified",
+        "version(entry)": "version",
+        # STRUCTURE
+        "3d": "structure_3d",
+        "feature(BETA STRAND)": "ft_strand",
+        "feature(HELIX)": "ft_helix",
+        "feature(TURN)": "ft_turn",
+
+        # subcellular function
+        "comment(SUBCELLULAR LOCATION)":"cc_subcellular_location",
+        "feature(INTRAMEMBRANE)":"ft_intramem",
+        "feature(TOPOLOGICAL DOMAIN)":"ft_topo_dom",
+        "feature(TRANSMEMBRANE)": "ft_transmem",
+
+        # Pathology
+        'comment(ALLERGEN)': 'cc_allergen',
+        'comment(BIOTECHNOLOGY)': 'cc_biotechnology',
+        'comment(DISRUPTION PHENOTYPE)': 'cc_disruption_phenotype',
+        'comment(DISEASE)': 'cc_disease',
+        'feature(MUTAGENESIS)': 'ft_mutagen',
+        'comment(PHARMACEUTICAL)': 'cc_pharmaceutical',
+        'comment(TOXIC DOSE)': 'cc_toxic_dose',
+
+        # PTM
+        'feature(CHAIN)': 'ft_chain',
+        'feature(CROSS LINK)': 'ft_crosslnk',
+        'feature(DISULFIDE BOND)': 'ft_disulfid',
+        'feature(GLYCOSYLATION)': 'ft_carbohyd',
+        'feature(INITIATOR METHIONINE)': 'ft_init_met',
+        'feature(LIPIDATION)': 'ft_lipid',
+        'feature(MODIFIED RESIDUE)': 'ft_mod_res',
+        'feature(PEPTIDE)': 'ft_peptide',
+        'comment(PTM)': 'cc_ptm',
+        'feature(PROPEPTIDE)': 'ft_propep',
+        'feature(SIGNAL)': 'ft_signal',
+        'feature(TRANSIT)': 'ft_transit',
+
+        # Family domains
+        'feature(COILED COIL)': 'ft_coiled',
+        'feature(COMPOSITIONAL BIAS)': 'ft_compbias',
+        'comment(DOMAIN)': 'cc_domain',
+        'feature(DOMAIN EXTENT)': 'ft_domain',
+        'feature(MOTIF)': 'ft_motif',
+        'families': 'protein_families',
+        'feature(REGION)': 'ft_region',
+        'feature(REPEAT)': 'ft_repeat',
+        'comment(SIMILARITY)': '<does not exist>',
+        'feature(ZINC FINGER)': 'ft_zn_fing',
+
+    }
+
     _valid_columns = [
-        # Names & Taxonomy
+        # Names & Taxonomy ================================================
+        "accession",
         "id",
-        "entry name",
-        "genes",
-        "genes(PREFERRED)",
-        "genes(ALTERNATIVE)",
-        "genes(OLN)",
-        "genes(ORF)",
-        "organism",
-        "organism-id",
-        "protein names",
-        "proteome",
-        "lineage(ALL)",
-        "lineage-id",
-        "virus hosts",
-        # Sequences
+        "gene_names",
+        "gene_primary",
+        "gene_synonym",
+        "gene_oln",
+        "gene_orf",
+        "organism_name",
+        "organism_id",
+        "protein_name",
+        "xref_proteomes",
+        "lineage",
+        "virus_hosts",
+
+        # Sequences ========================================================
         "fragment",
         "sequence",
         "length",
         "mass",
-        "encodedon",
-        "comment(ALTERNATIVE PRODUCTS)",
-        "comment(ERRONEOUS GENE MODEL PREDICTION)",
-        "comment(ERRONEOUS INITIATION)",
-        "comment(ERRONEOUS TERMINATION)",
-        "comment(ERRONEOUS TRANSLATION)",
-        "comment(FRAMESHIFT)",
-        "comment(MASS SPECTROMETRY)",
-        "comment(POLYMORPHISM)",
-        "comment(RNA EDITING)",
-        "comment(SEQUENCE CAUTION)",
-        "feature(ALTERNATIVE SEQUENCE)",
-        "feature(NATURAL VARIANT)",
-        "feature(NON ADJACENT RESIDUES)",
-        "feature(NON STANDARD RESIDUE)",
-        "feature(NON TERMINAL RESIDUE)",
-        "feature(SEQUENCE CONFLICT)",
-        "feature(SEQUENCE UNCERTAINTY)",
-        "version(sequence)",
-        # Family and Domains
-        "domains",
-        "domain",
-        "comment(DOMAIN)",
-        "comment(SIMILARITY)",
-        "feature(COILED COIL)",
-        "feature(COMPOSITIONAL BIAS)",
-        "feature(DOMAIN EXTENT)",
-        "feature(MOTIF)",
-        "feature(REGION)",
-        "feature(REPEAT)",
-        "feature(ZINC FINGER)",
-        # Function
-        "ec",
-        "comment(ABSORPTION)",
-        "comment(CATALYTIC ACTIVITY)",
-        "comment(COFACTOR)",
-        "comment(ENZYME REGULATION)",
-        "comment(FUNCTION)",
-        "comment(KINETICS)",
-        "comment(PATHWAY)",
-        "comment(REDOX POTENTIAL)",
-        "comment(TEMPERATURE DEPENDENCE)",
-        "comment(PH DEPENDENCE)",
-        "feature(ACTIVE SITE)",
-        "feature(BINDING SITE)",
-        "feature(DNA BINDING)",
-        "feature(METAL BINDING)",
-        "feature(NP BIND)",
-        "feature(SITE)",
-        # Gene Ontologys
+        "organelle",
+        "cc_alternative_products",
+        "error_gmodel_pred",
+        "cc_mass_spectrometry",
+        "cc_polymorphism",
+        "cc_rna_editing",
+        "cc_sequence_caution",
+        "ft_var_seq",
+        "ft_variant",
+        "ft_non_cons",
+        "ft_non_std",
+        "ft_non_ter",
+        "ft_conflict",
+        "ft_unsure",
+        "sequence_version",
+        
+        # Family and Domains ========================================
+        'ft_coiled',
+        'ft_compbias',
+        'cc_domain',
+        'ft_domain',
+        'ft_motif',
+        'protein_families',
+        'ft_region',
+        'ft_repeat',
+        'ft_zn_fing',
+
+        # Function ===================================================
+        'absorption',
+        'ft_act_site',
+        'cc_activity_regulation',
+        'ft_binding',
+        'ft_ca_bind',
+        'cc_catalytic_activity',
+        'cc_cofactor',
+        'ft_dna_bind',
+        'ec',
+        'cc_function',
+        'kinetics',
+        'ft_metal',
+        'ft_np_bind',
+        'cc_pathway',
+        'ph_dependence',
+        'redox_potential',
+        #'rhea_id',
+        'ft_site',
+        'temp_dependence',
+
+        # Gene Ontology ==================================
         "go",
-        "go(biological process)",
-        "go(molecular function)",
-        "go(cellular component)",
-        "go-id",
-        # InterPro
-        "interpro",
-        # Interaction
-        "interactor",
-        "comment(SUBUNIT)",
+        "go_p",
+        "go_f",
+        "go_c",
+        "go_id",
+
+        # Interaction ======================================
+        "cc_interaction",
+        "cc_subunit",
+
+        # EXPRESSION =======================================
+        "cc_developmental_stage",
+        "cc_induction",
+        "cc_tissue_specificity",
+
         # Publications
-        "citation",
-        "citationmapping",
+        "lit_pubmed_id",
+        
         # Date of
-        "created",
-        "last-modified",
-        "sequence-modified",
-        "version(entry)",
+        "date_created",
+        "date_modified",
+        "date_sequence_modified",
+        "version",
+
         # Structure
-        "3d",
-        "feature(BETA STRAND)",
-        "feature(HELIX)",
-        "feature(TURN)",
+        "structure_3d",
+        "ft_strand",
+        "ft_helix",
+        "ft_turn",
+
         # Subcellular location
-        "comment(SUBCELLULAR LOCATION)",
-        "feature(INTRAMEMBRANE)",
-        "feature(TOPOLOGICAL DOMAIN)",
-        "feature(TRANSMEMBRANE)",
-        # Miscellaneous
-        "annotation score",
-        "score",
-        "features",
-        "comment(CAUTION)",
-        "comment(TISSUE SPECIFICITY)",
-        "comment(GENERAL)",
-        "keywords",
-        "context",
-        "existence",
+        "cc_subcellular_location",
+        "ft_intramem",
+        "ft_topo_dom",
+        "ft_transmem",
+
+        # Miscellaneous ==========================
+        "annotation_score",
+        "cc_caution",
+        "comment_count",
+        #"feature",
+        "feature_count",
+        "keyword",
+        "keywordid",
+        "cc_miscellaneous",
+        "protein_existence",
         "tools",
         "reviewed",
-        "feature",
-        "families",
-        "subcellular locations",
-        "taxonomy",
-        "version",
-        "clusters",
-        "comments",
-        "database",
-        "keyword-id",
-        "pathway",
-        "score",
-        # Pathology & Biotech
-        "comment(ALLERGEN)",
-        "comment(BIOTECHNOLOGY)",
-        "comment(DISRUPTION PHENOTYPE)",
-        "comment(DISEASE)",
-        "comment(PHARMACEUTICAL)",
-        "comment(TOXIC DOSE)",
+        "uniparc_id",
+        
+        # Pathology
+        'cc_allergen',
+        'cc_biotechnology',
+        'cc_disruption_phenotype',
+        'cc_disease',
+        'ft_mutagen',
+        'cc_pharmaceutical',
+        'cc_toxic_dose',
+
         # PTM / Processsing
-        "comment(PTM)",
-        "feature(CHAIN)",
-        "feature(CROSS LINK)",
-        "feature(DISULFIDE BOND)",
-        "feature(GLYCOSYLATION)",
-        "feature(INITIATOR METHIONINE)",
-        "feature(LIPIDATION)",
-        "feature(MODIFIED RESIDUE)",
-        "feature(PEPTIDE)",
-        "feature(PROPEPTIDE)",
-        "feature(SIGNAL)",
-        "feature(TRANSIT)",
-        # Taxonomic lineage
-        "lineage(all)",
-        "lineage(SUPERKINGDOM)",
-        "lineage(KINGDOM)",
-        "lineage(SUBKINGDOM)",
-        "lineage(SUPERPHYLUM)",
-        "lineage(PHYLUM)",
-        "lineage(SUBPHYLUM)",
-        "lineage(SUPERCLASS)",
-        "lineage(CLASS)",
-        "lineage(SUBCLASS)",
-        "lineage(INFRACLASS)",
-        "lineage(SUPERORDER)",
-        "lineage(ORDER)",
-        "lineage(SUBORDER)",
-        "lineage(INFRAORDER)",
-        "lineage(PARVORDER)",
-        "lineage(SUPERFAMILY)",
-        "lineage(FAMILY)",
-        "lineage(SUBFAMILY)",
-        "lineage(TRIBE)",
-        "lineage(SUBTRIBE)",
-        "lineage(GENUS)",
-        "lineage(SUBGENUS)",
-        "lineage(SPECIES GROUP)",
-        "lineage(SPECIES SUBGROUP)",
-        "lineage(SPECIES)",
-        "lineage(SUBSPECIES)",
-        "lineage(VARIETAS)",
-        "lineage(FORMA)",
-        # Taxonomic identifier
-        "lineage-id(all)",
-        "lineage-id(SUPERKINGDOM)",
-        "lineage-id(KINGDOM)",
-        "lineage-id(SUBKINGDOM)",
-        "lineage-id(SUPERPHYLUM)",
-        "lineage-id(PHYLUM)",
-        "lineage-id(SUBPHYLUM)",
-        "lineage-id(SUPERCLASS)",
-        "lineage-id(CLASS)",
-        "lineage-id(SUBCLASS)",
-        "lineage-id(INFRACLASS)",
-        "lineage-id(SUPERORDER)",
-        "lineage-id(ORDER)",
-        "lineage-id(SUBORDER)",
-        "lineage-id(INFRAORDER)",
-        "lineage-id(PARVORDER)",
-        "lineage-id(SUPERFAMILY)",
-        "lineage-id(FAMILY)",
-        "lineage-id(SUBFAMILY)",
-        "lineage-id(TRIBE)",
-        "lineage-id(SUBTRIBE)",
-        "lineage-id(GENUS)",
-        "lineage-id(SUBGENUS)",
-        "lineage-id(SPECIES GROUP)",
-        "lineage-id(SPECIES SUBGROUP)",
-        "lineage-id(SPECIES)",
-        "lineage-id(SUBSPECIES)",
-        "lineage-id(VARIETAS)",
-        "lineage-id(FORMA)",
-        # Cross-references
-        "database(db_abbrev)",
-        "database(EMBL)",
+        'ft_chain',
+        'ft_crosslnk',
+        'ft_disulfid',
+        'ft_carbohyd',
+        'ft_init_met',
+        'ft_lipid',
+        'ft_mod_res',
+        'ft_peptide',
+        'cc_ptm',
+        'ft_propep',
+        'ft_signal',
+        'ft_transit',
+
+        # not documented
+        'xref_pdb'
     ]
+    _url = "https://rest.uniprot.org"
 
     def __init__(self, verbose=False, cache=False):
         """**Constructor**
 
         :param verbose: set to False to prevent informative messages
+        :param cache: set to True to cache request
         """
 
-        self.services = REST(name="UniProt", url=UniProt._url, verbose=verbose, cache=cache)
+        self.services = REST(name="UniProt", url=UniProt._url, verbose=verbose, cache=cache, url_defined_later=True)
 
         self.TIMEOUT = 100
-
+        self._valid_mapping = None
         self._database = "uniprot"
 
-    def _download_flat_files(self):
+    def _download_flat_files(self, output="uniprot_sprot.dat.gz"): #pragma: no cover
         """could be used to get all data in flat files (about compressed 500Mb )"""
+        # deprecated in v1.10 due to API change in uniprot
         url = "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz"
         self.services.logging.info("Downloading uniprot file from the web. May take some time.:")
-        import urllib
-
-        urllib.urlretrieve(url, "uniprot_sprot.dat.gz")
-
-    def mapping(self, fr="ID", to="KEGG_ID", query="P13368"):
+        urllib.request.urlretrieve(url, output)
+
+    def _get_valid_mapping(self):
+        if not self._valid_mapping:
+            self._set_valid_mapping()
+        return self._valid_mapping
+
+    def _set_valid_mapping(self):
+        fields = self.services.http_get("configure/idmapping/fields")
+        groups = fields["groups"]
+        rules = {}
+        for item in fields["rules"]:
+            ID = item['ruleId']
+            rules[ID] = item
+
+        # This is suppose to be a set of database name available in Uniprot 
+        from_to = {}
+        for item in [x for group in groups for x in group['items']]:
+            # should be name, not DisplayName
+            name = item['name']
+            if item['from']:
+                tos = rules[item['ruleId']]['tos']
+                from_to[name] = tos
+
+        self._valid_mapping = from_to
+    
+    valid_mapping = property(_get_valid_mapping, _set_valid_mapping)
+
+    def mapping(self, fr="UniProtKB_AC-ID", to="KEGG", query="P13368", polling_interval_seconds=3, max_waiting_time=100):
         """This is an interface to the UniProt mapping service
 
-        :param fr: the source database identifier. See :attr:`_mapping`.
-        :param to: the targetted database identifier. See :attr:`_mapping`.
+        :param fr: the source database identifier. See :attr:`valid_mapping`.
+        :param to: the targetted database identifier. See :attr:`valid_mapping`.
         :param query: a string containing one or more IDs separated by a space
             It can also be a list of strings.
-        :param format: The output being a dictionary, this parameter is
-            deprecated and not used anymore
-        :return: a list. The first element is the source database Id. The second
-            is the targetted source identifier. Following elements are alternate
-            of one the entry and its mapped Id. If a query has several mapped
-            Ids, the query is repeated (see example with PDB mapping here below)
-            e.g., ["From:ID", "to:PDB_ID", "P43403"]
+        :param polling_interval_seconds: the number of seconds between each status check of the current job
+        :param max_waiting_time: the maximum number of seconds to wait for the final answer.
+        :return: a dictionary with two possible keys. The first one is 'results' 
+            with the from / to answers and the second one 'failedIds' with Ids that were not found
 
         ::
 
-            >>> u.mapping("ACC", "KEGG_ID", 'P43403')
-            defaultdict(<type 'list'>, {'P43403': ['hsa:7535']})
-            >>> u.mapping("ACC", "KEGG_ID", 'P43403 P00958')
-            defaultdict(<type 'list'>, {'P00958': ['sce:YGR264C'], 'P43403': ['hsa:7535']})
-            >>> u.mapping("ID", "PDB_ID", "P43403")
-            defaultdict(<type 'list'>, {'P43403': ['1FBV', '1M61', '1U59',
-            '2CBL', '2OQ1', '2OZO', '2Y1N', '3ZNI', '4A4B', '4A4C', '4K2R']})
+            >>> u.mapping("UniProtKB_AC-ID", "KEGG", 'P43403')
+            {'results': [{'from': 'P43403', 'to': 'hsa:7535'}]}
+
+        The output is a dictionary. Identifiers that were not found are stored in the keys 
+        'failedIds'. Succesful queries are stored in the 'results' key that is a list
+        of dictionaries with two keys set to 'from' and 'to'. The 'from' key should be in your input list.
+        The 'to' key is the result. Here we have the KEGG identifier recognised by its prefix 'hsa:', which is for human. 
+        Sometimes the output ('to') it is more complicated. Consider the following  example::
+
+            u.mapping("UniParc", "UniProtKB", 'UPI0000000001,UPI0000000002')
 
-        There is a web page that gives the list of correct `database identifiers
-        <http://www.uniprot.org/faq/28>`_. You can also look at the
-        :attr:`_mapping` attribute.
+        You will see that the UniParc results is more complex than just an identifier.
 
-        :URL: http://www.uniprot.org/mapping/
+        See :attr:`valid_mapping` attribut for list of valid mapping identifiers.
+
+        Note that according to Uniprot (June 2022), there are various limits on ID Mapping Job Submission:
+
+        ========= =====================================================================================
+        Limit	  Details     
+        ========= =====================================================================================
+        100,000	  Total number of ids allowed in comma separated param ids in /idmapping/run api
+        500,000	  Total number of "mapped to" ids allowed
+        100,000	  Total number of "mapped to" ids allowed to be enriched by UniProt data
+        10,000	  Total number of "mapped to" ids allowed with filtering
+        ========= =====================================================================================
 
         .. versionchanged:: 1.1.1 to return a dictionary instaed of a list
         .. versionchanged:: 1.1.2 the values for each key is now made of a list
@@ -453,130 +459,137 @@ def mapping(self, fr="ID", to="KEGG_ID", query="P13368"):
             instead of just a string
         .. versionchanged:: 1.3.1:: use http_post instead of http_get. This is 3 times
             faster and allows queries with more than 600 entries in one go.
+        .. version 1.10.0:: new API due to  uniprot website update
         """
-        url = "mapping/"  # the slash matters
-
-        query = self.services.devtools.list2string(query, sep=" ", space=False)
-        # if isinstance(query, list):
-        #    query = " ".join(query)
-        params = {"from": fr, "to": to, "format": "tab", "query": query}
-        result = self.services.http_post(url, frmt="txt", data=params)
-
-        # changes in version 1.1.1 returns a dictionary instead of list
-        try:
-            result = result.split()
-            del result[0]
-            del result[0]
-        except:
-            self.services.logging.warning("Results seems empty...returning empty dictionary.")
-            return {}
-
-        if len(result) == 0:
-            return {}
-        else:
-            # bug fix based on ticket #19 version 1.1.2
-            # the default dict set empty list for all keys by default
-            from collections import defaultdict
-
-            result_dict = defaultdict(list)
 
-            keys = result[0::2]
-            values = result[1::2]
-            for i, key in enumerate(keys):
-                result_dict[key].append(values[i])
-        return result_dict
+        if isinstance(query, (list, tuple)):
+            query = ",".join(query)
+        elif isinstance(query,str):
+            pass
 
-    def searchUniProtId(self, uniprot_id, frmt="xml"):
-        self.services.logging.warning("DEPRECATED SINCE VERSION 1.3.1. use retrieve instead")
+        # First, we call the real mapping request
+        params = {"from": fr, "to": to, "ids": query}
 
-    def retrieve(self, uniprot_id, frmt="xml", database="uniprot"):
+        job = self.services.http_post("idmapping/run", frmt="json", data=params)
+        try:
+            job_id = job['jobId']
+        except TypeError:
+             logger.error(self.services.last_response.content.decode())
+             return
+
+        # the job id will tell us about the job status 
+        results = None
+        waiting_time = 0
+        while not results and waiting_time < max_waiting_time:   
+            logger.info("Waiting for {job_id} to complete")         
+            results = self.services.http_get(f"idmapping/status/{job_id}", frmt="json")
+            if results != 500 and 'results' in results:
+                return results
+            else: #pragma: no cover
+                time.sleep(polling_interval_seconds)
+                results = None
+            waiting_time += polling_interval_seconds
+
+    def retrieve(self, uniprot_id, frmt="json", database="uniprot", include=False):
         """Search for a uniprot ID in UniProtKB database
 
-        :param str uniprot: a valid UniProtKB ID or a list of identifiers.
+        :param str uniprot: a valid UniProtKB ID, or uniref, uniparc or taxonomy. 
         :param str frmt: expected output format amongst xml, txt, fasta, gff, rdf
-        :return: is a list of identifiers is provided, the output is also a list
+        :param str database:  database name in (uniprot, uniparc, uniref, taxonomy)
+        :param bool include: include data with RDF format.
+        :return: if the parameter uniprot_id is string, the output will be a a list of identifiers is provided, the output is also a list
             otherwise, a string. The content of the string of items in the list
             depends on the value of **frmt**.
 
         ::
 
             >>> u = UniProt()
-            >>> res = u.retrieve("P09958", frmt="xml")
-            >>> fasta = u.retrieve([u'P29317', u'Q5BKX8', u'Q8TCD6'], frmt='fasta')
+            >>> res = u.retrieve("P09958", frmt="txt")
+            >>> fasta = u.retrieve(['P29317', 'Q5BKX8', 'Q8TCD6'], frmt='fasta')
             >>> print(fasta[0])
 
-        """
-        _valid_formats = ["txt", "xml", "rdf", "gff", "fasta"]
-        self.services.devtools.check_param_in_list(frmt, _valid_formats)
 
-        queries = self.services.devtools.to_list(uniprot_id)
+        .. versionchanged:: 1.10 the xml format is now returned as raw XML. It is not
+            interpreted anymore. The RDF has now an additional option to include data 
+            from referenced data sets directly in the returned data (set include=True parameter).
+            Default output format is now set to json. 
+        """
+        if database == 'uniprot': 
+            if frmt not in ("txt", "xml", "rdf", "gff", "fasta", "json"):#pragma: no cover
+                self.services.logging.warning("frmt must be set to one of: txt, xml, rdf, gff, fasta, json.")
+        elif database == 'uniparc':
+            if frmt not in ( "xml", "rdf", "fasta", 'tsv', 'json'): #pragma: no cover
+                raise ValueError("frmt must be set to one of: tsv, xml, rdf, gff, fasta, json")
+                self.services.logging.warning("frmt must be set to one of: txt, xml, rdf, gff, fasta.")
+        elif database == 'uniref':
+            if frmt not in ("xml", "rdf", "fasta", 'tsv', 'json'): #pragma: no cover
+                self.services.logging.warning("frmt must be set to one of: xml, rdf, gff, fasta, json.")
+        elif database == "taxonomy":
+            pass
+        else: #pragma: no cover
+            self.services.logging.warning("database must be set to uniref, uniparc, uniprot or taxonomy")
+
+
+        if isinstance(uniprot_id, str):
+            queries = uniprot_id.split(",")
+        else:
+            queries = uniprot_id
+        #queries = self.services.devtools.to_list(uniprot_id)
 
+        # some magic here not documented on uniprot website...but multiple queries are possible
         url = [database + "/" + query + "." + frmt for query in queries]
-        res = self.services.http_get(url, frmt="txt")
-        if frmt == "xml":
-            res = [self.services.easyXML(x) for x in res]
+
+        # the frmt=txt here is for the requests, nothing related to the uniprot format
+        res = self.services.http_get(url, frmt="txt", params={'include':include})
+        if frmt == 'json':
+            for i, x in enumerate(res):
+                try:
+                    res[i] = json.loads(x)
+                except:
+                    pass
+
         if isinstance(res, list) and len(res) == 1:
             res = res[0]
         return res
 
-    def get_fasta(self, id_):
+    def get_fasta(self, uniprot_id):
         """Returns FASTA string given a valid identifier
 
+        :param str uniprot_id: a valid identifier (e.g. P12345)
 
-        .. seealso:: :mod:`bioservices.apps.fasta` for dedicated tools to
-            manipulate FASTA
-        """
-        from bioservices.apps.fasta import FASTA
-
-        f = FASTA()
-        f.load_fasta(id_)
-        return f.fasta
-
-    def get_fasta_sequence(self, id_):
-        """Returns FASTA sequence (Not FASTA)
-
-        :param str id_: Should be the entry name
-        :return: returns fasta sequence (string)
-
-        .. warning:: this is the sequence found in a fasta file, not the fasta
-            content itself. The difference is that the header is removed and the
-            formatting of end of lines every 60 characters is removed.
+        This is just an alias to :meth:`retrieve` when setting the format to 'fasta'. 
+        Method kept for legacy.
 
         """
-        from bioservices.apps.fasta import FASTA
-
-        f = FASTA()
-        f.load_fasta(id_)
-        return f.sequence
+        res = self.retrieve(uniprot_id, frmt='fasta')
+        return res
 
     def search(
         self,
         query,
-        frmt="tab",
+        frmt="tsv",
         columns=None,
-        include=False,
+        include_isoforms=False,
         sort="score",
         compress=False,
         limit=None,
         offset=None,
         maxTrials=10,
-        database="uniprot",
+        database="uniprotkb",
     ):
         """Provide some interface to the uniprot search interface.
 
         :param str query: query must be a valid uniprot query.
-            See http://www.uniprot.org/help/text-search, http://www.uniprot.org/help/query-fields
-            See also example below
+            See https://www.uniprot.org/help/query-fields and examples below
         :param str frmt: a valid format amongst html, tab, xls, asta, gff,
             txt, xml, rdf, list, rss. If tab or xls, you can also provide the
             columns argument.  (default is tab)
         :param str columns: comma-separated list of values. Works only if fomat
-            is tab or xls. For UnitProtKB, some possible columns are:
-            id, entry name, length, organism. Some column name must be followed by
-            database name (e.g., "database(PDB)"). Again, see uniprot website
-            for more details. See also :attr:`~bioservices.uniprot.UniProt._valid_columns`
-            for the full list of column keyword.
-        :param bool include: include isoform sequences when the frmt
+            is tsv or xls. For UnitProtKB, some possible columns are:
+            id, entry name, length, organism. 
+            See also :attr:`~bioservices.uniprot.UniProt.valid_mapping`
+            for the full list of column keywords.
+        :param bool include_isoform: include isoform sequences when the frmt
             parameter is fasta. Include description when frmt is rdf.
         :param str sort: by score by default. Set to None to bypass this behaviour
         :param bool compress: gzip the results
@@ -589,9 +602,9 @@ def search(
         To obtain the list of uniprot ID returned by the search of zap70 can be
         retrieved as follows::
 
-            >>> u.search('zap70+AND+organism:9606', frmt='list')
-            >>> u.search("zap70+and+taxonomy:9606", frmt="tab", limit=3,
-            ...    columns="entry name,length,id, genes")
+            >>> u.search('zap70+AND+organism_id:9606')
+            >>> u.search("zap70+AND+taxonomy_id:9606", frmt="tsv", limit=3,
+            ...    columns="entry_name,length,id, gene_names")
             Entry name  Length  Entry   Gene names
             CBLB_HUMAN  982 Q13191  CBLB RNF56 Nbla00127
             CBL_HUMAN   906 P22681  CBL CBL2 RNF55
@@ -599,24 +612,24 @@ def search(
 
         other examples::
 
-            >>> u.search("ZAP70+AND+organism:9606", limit=3, columns="id,database(PDB)")
+            >>> u.search("ZAP70+AND+organism_id:9606", limit=3, columns="id,xref_pdb")
 
         You can also do a search on several keywords. This is especially useful
         if you have a list of known entry names.::
 
-            >>> u.search("ZAP70_HUMAN+or+CBL_HUMAN", frmt="tab", limit=3,
+            >>> u.search("ZAP70_HUMAN+OR+CBL_HUMAN", frmt="tsv", limit=3,
             ...    columns="entry name,length,id, genes")
             Entry name  Length  Entry   Gene names
 
 
-        Finally, note that when search for a query, you may have several hits::
+        Finally, note that when you search for a query, you may have several hits::
 
             >>> u.search("P12345)
 
-        including the ID P12345 but also related entries. If you need only the
-        entry that perfectly match the query, use::
+        including the ID P12345 but also related entries. If you 
+        need only the entry that perfectly match the query, use::
 
-            >>> u.search("id:P12345")
+            >>> u.search("accession:P12345")
 
         This was provided from a user issue that was solved here:
         https://github.com/cokelaer/bioservices/issues/122
@@ -625,16 +638,24 @@ def search(
         .. warning:: some columns although valid may not return anything, not even in
             the header: 'score', 'taxonomy', 'tools'. this is a uniprot feature,
             not bioservices.
+
+        .. versionchanged:: 1.10 
+        
+            Due to uniprot API changes in June 2022:
+
+            * parameter 'include' is not named 'include_isoform
+            * default parameter 'tab' is now 'tsv' but does not change the results
+
         """
         params = {}
 
         if frmt is not None:
             _valid_formats = [
-                "tab",
                 "xls",
                 "fasta",
                 "gff",
                 "txt",
+                "tsv",
                 "xml",
                 "rss",
                 "list",
@@ -645,7 +666,7 @@ def search(
             params["format"] = frmt
 
         if columns is not None:
-            self.services.devtools.check_param_in_list(frmt, ["tab", "xls"])
+            self.services.devtools.check_param_in_list(frmt, ["tsv", "xls"])
 
             # remove unneeded spaces before/after commas if any
             if "," in columns:
@@ -653,17 +674,12 @@ def search(
             else:
                 columns = [columns]
 
-            for col in columns:
-                if col.startswith("database(") is True:
-                    pass
-                else:
-                    self.services.devtools.check_param_in_list(col, self._valid_columns)
 
             # convert back to a string as expected by uniprot
-            params["columns"] = ",".join([x.strip() for x in columns])
+            params["fields"] = ",".join([x.strip() for x in columns])
 
-        if include is True and frmt in ["fasta", "rdf"]:
-            params["include"] = "yes"
+        if include_isoforms is True and frmt in ["fasta", "rdf"]:
+            params["includeIsoform"] = "yes"
 
         if compress is True:
             params["compress"] = "yes"
@@ -673,31 +689,35 @@ def search(
             params["sort"] = sort
 
         if offset is not None:
-            if isinstance(offset, int):
-                params["offset"] = offset
+            #if isinstance(offset, int):
+            params["cursor"] = offset
 
         if limit is not None:
             if isinstance(limit, int):
-                params["limit"] = limit
+                params["size"] = limit
+
+        # + are interpreted and have a meaning. See arrayexpress module for details
 
-        # + are interpreted and have a meaning.
-        params["query"] = query.replace("+", " ")
-        # res = s.request("/uniprot/?query=zap70+AND+organism:9606&format=xml", params)
-        res = self.services.http_get(database + "/", frmt="txt", params=params)
+        query = query.replace("+", " ")
+        params['query'] = query
+        del params['sort']
+
+        res = self.services.http_get(f"{database}/search", frmt="txt", params=params)
         return res
 
-    def quick_search(self, query, include=False, sort="score", limit=None):
+    def quick_search(self, query, include_isoforms=False, sort="score", limit=None):
         """a specialised version of :meth:`search`
 
         This is equivalent to::
 
             u = uniprot.UniProt()
-            u.search(query, frmt="tab", include=False, sor="score", limit=None)
+            u.search(query, frmt='tsv', sort="score", limit=None)
 
         :returns: a dictionary.
 
         """
-        res = self.search(query, "tab", include=include, sort=sort, limit=limit)
+        res = self.search(query, "tsv", include_isoforms=include_isoforms, 
+            sort=sort, limit=limit)
 
         # if empty result, nothing to do
         if res and len(res) == 0:
@@ -722,33 +742,23 @@ def quick_search(self, query, include=False, sort="score", limit=None):
     def uniref(self, query):
         """Calls UniRef service
 
-        :return: if you have Pandas installed, returns a dataframe (see example)
-
+        This is an alias to :meth:`retrieve`
         ::
 
             >>> u = UniProt()
-            >>> df = u.uniref("member:Q03063")  # of just A03063
-            >>> df.Size
+            >>> u.uniref("Q03063")
 
         Another example from https://github.com/cokelaer/bioservices/issues/121
         is the combination of uniprot and uniref filters::
 
-            u.uniref("uniprot:(ec:1.1.1.282 taxonomy:bacteria reviewed:yes) AND identity:0.5")
+            u.uniref("uniprot:(ec:1.1.1.282 taxonomy_name:bacteria reviewed:true)")
 
+        .. versionchanged:: 1.10 due to uniprot API changes in June 2022, 
+            we now return a json instead of a pandas dataframe.
         """
-        try:
-            import pandas as pd
-        except:
-            print("uniref method requires Pandas", file=sys.stderr)
-            return
         res = self.services.http_get(
-            "uniref/", params={"query": query, "format": "tab"}, frmt="txt"
+            f"uniref/UniRef90_{query}.json", frmt="json"
         )
-        try:
-            # python 2.X
-            res = pd.read_csv(io.StringIO(unicode(res)), sep="\t")
-        except:
-            res = pd.read_csv(io.StringIO(str(res.strip())), sep="\t")
         return res
 
     def get_df(self, entries, nChunk=100, organism=None, limit=10):
@@ -762,10 +772,6 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
             set it to None to keep all entries but this will be very slow
         :return: dataframe with indices being the uniprot id (e.g. DIG1_YEAST)
 
-        .. todo:: cleanup the content of the data frame to replace strings
-            separated by ; into a list of strings. e.g. the Gene Ontology IDs
-
-        .. warning:: requires pandas library
         """
         if isinstance(entries, str):
             entries = [entries]
@@ -786,18 +792,19 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
             this_entries = entries[i * nChunk : (i + 1) * nChunk]
             if len(this_entries):
                 self.services.logging.info("uniprot.get_df {}/{}".format(i + 1, N))
-                query = "+or+".join(this_entries)
+                query = "+OR+".join(this_entries)
                 if organism:
-                    query += "+and+" + organism
+                    query += f"+AND+{organism}"
 
                 res = self.search(
                     query,
-                    frmt="tab",
+                    frmt="tsv",
                     columns=",".join(self._valid_columns),
                     limit=limit,
                 )
             else:
                 break
+
             if len(res) == 0:
                 self.services.logging.warning("some entries %s not found" % entries)
             else:
@@ -806,6 +813,7 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
                     df = pd.read_csv(io.StringIO(unicode(res)), sep="\t")
                 except:
                     df = pd.read_csv(io.StringIO(str(res)), sep="\t")
+
                 if isinstance(output, type(None)):
                     output = df.copy()
                 else:
@@ -813,22 +821,15 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
 
         # you may end up with duplicated...
         output.drop_duplicates(inplace=True)
-        # you may have new entries...
-        # output = output[output.Entry.apply(lambda x: x in entries)]
-        # to transform into list:
         columns = [
-            "PubMed ID",
-            "Comments",
-            u"Domains",
-            "Protein families",
+            "lit_pubmed_id",
+            "protein_families",
             "Gene names",
-            "Gene ontology (GO)",
-            "Gene ontology IDs",
-            "InterPro",
-            "Interacts with",
-            "Keywords",
+            "go",
+            "go_ids",
+            "interaction",
+            "keyword",
         ]
-        #'Subcellular location']
         for col in columns:
             try:
                 res = output[col].apply(
@@ -841,7 +842,8 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
                 self.services.logging.warning("column could not be parsed. %s" % col)
         # Sequences are splitted into chunks of 10 characters. let us rmeove
         # the spaces:
-        output["Sequence"].fillna("", inplace=True)
-        output.Sequence = output["Sequence"].apply(lambda x: x.replace(" ", ""))
+        if "sequence" in output.columns:
+            output["sequence"].fillna("", inplace=True)
+            output.Sequence = output["sequence"].apply(lambda x: x.replace(" ", ""))
 
         return output
diff --git a/test/webservices/test_uniprot.py b/test/webservices/test_uniprot.py
index 03e8309..920cb76 100644
--- a/test/webservices/test_uniprot.py
+++ b/test/webservices/test_uniprot.py
@@ -9,43 +9,55 @@ def uniprot():
 
 
 def test_mapping(uniprot):
-    res = uniprot.mapping(fr="ACC+ID", to="KEGG_ID", query='P43403')
-    assert res['P43403'] == ['hsa:7535']
+
+    assert "KEGG" in uniprot.valid_mapping
+    res = uniprot.mapping("UniProtKB_AC-ID", "KEGG", "P43403,P123456")
+    res = uniprot.mapping("UniProtKB_AC-ID", "KEGG", ["P43403","P123456"])
+    assert len(res['results']) == 1
+    assert len(res['failedIds']) == 1
+    
     try:
-        res = uniprot.mapping(fr="AC", to="KEID", query='P434')
+        res = uniprot.mapping("UniProtKB_AC-ID", "KEGGDUMMY", "P43403,P123456")
         assert False
     except:
         assert True
 
 
 def test_retrieve(uniprot):
-    uniprot.retrieve("P09958", frmt="rdf")
-    uniprot.retrieve("P09958", frmt="xml")
-    uniprot.retrieve("P09958", frmt="txt")
-    uniprot.retrieve("P09958", frmt="fasta")
-    uniprot.retrieve("P09958", frmt="gff")
-    try:
-        uniprot.retrieve("P09958", frmt="dummy")
-        assert False
-    except:
-        assert True
+
+    for frmt in ['rdf', 'xml', 'txt', 'fasta', 'gff', 'json']:
+        uniprot.retrieve("P09958", frmt=frmt)
+
+    # test input parameters
+    assert uniprot.retrieve("P09958", frmt='json') == uniprot.retrieve(['P09958'], frmt='json')
+
+    for frmt in ['rdf', 'xml', 'txt', 'fasta', 'gff', 'json']:
+        uniprot.retrieve("P09958", frmt=frmt, database='uniref')
+
+    assert uniprot.retrieve("P09958", frmt='json', database='dummy') in [400, 404]
+
 
 def test_search(uniprot):
+    # two strings, or list, or a single string
+    uniprot.search("P43403", columns="id")
+        
+
     uniprot.search('zap70+AND+organism:9606', frmt='list')
-    uniprot.search("zap70+and+taxonomy:9606", frmt="tab", limit=3,
+    uniprot.search("zap70+and+taxonomy:9606", frmt="tsv", limit=3,
                 columns="entry name,length,id, genes, genes(PREFERRED), interpro, interactor")
-    uniprot.search("zap70+and+taxonomy:9606", frmt="tab", limit=3,
+    uniprot.search("zap70+and+taxonomy:9606", frmt="tsv", limit=3,
                 columns="entry name, go(biological process), comment(FUNCTION), comment(DOMAIN), lineage(all)")
-    uniprot.search("ZAP70_HUMAN", frmt="tab", columns="sequence", limit=1)
+    uniprot.search("ZAP70_HUMAN", frmt="tsv", columns="sequence", limit=1)
+
+def test_quick_search(uniprot):
     uniprot.quick_search("ZAP70")
+    uniprot.quick_search("ZAP70","ZAP70")
 
 def test_uniref(uniprot):
-    df = uniprot.uniref("member:Q03063")
-    df.Size
+    assert 'goTerms' in uniprot.uniref("Q03063")
 
 def test_get_df(uniprot):
     df = uniprot.get_df(["P43403"])
 
 def test_fasta(uniprot):
     "Q9Y617" in uniprot.get_fasta(["Q9Y617-1"])
-    "Q9Y617" not in uniprot.get_fasta_sequence(["Q9Y617-1"])

From 7808ab011884e702286dc3d9773d808081a7bae3 Mon Sep 17 00:00:00 2001
From: Thomas Cokelaer <cokelaer@gmail.com>
Date: Fri, 1 Jul 2022 15:12:07 +0200
Subject: [PATCH 3/5] Remove py2.X support

---
 src/bioservices/uniprot.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/bioservices/uniprot.py b/src/bioservices/uniprot.py
index b6797ba..c8847d5 100644
--- a/src/bioservices/uniprot.py
+++ b/src/bioservices/uniprot.py
@@ -40,7 +40,6 @@
 
 """
 import io
-import sys
 import time
 import urllib
 import json
@@ -808,11 +807,7 @@ def get_df(self, entries, nChunk=100, organism=None, limit=10):
             if len(res) == 0:
                 self.services.logging.warning("some entries %s not found" % entries)
             else:
-                try:
-                    # python 2.X
-                    df = pd.read_csv(io.StringIO(unicode(res)), sep="\t")
-                except:
-                    df = pd.read_csv(io.StringIO(str(res)), sep="\t")
+                df = pd.read_csv(io.StringIO(str(res)), sep="\t")
 
                 if isinstance(output, type(None)):
                     output = df.copy()

From 8236c2a6313a8ec74b3aa7c30e85d884cef46cd4 Mon Sep 17 00:00:00 2001
From: Thomas Cokelaer <cokelaer@gmail.com>
Date: Fri, 1 Jul 2022 15:34:23 +0200
Subject: [PATCH 4/5] update metadata

---
 README.rst | 1 +
 setup.py   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index b8d43e6..dc8092d 100644
--- a/README.rst
+++ b/README.rst
@@ -176,6 +176,7 @@ Changelog
 ========= ====================================================================
 Version   Description
 ========= ====================================================================
+1.10.0    * Update uniprot to use the new API (june 2022)
 1.9.0     * Update unichem to reflect new API
 1.8.4     * biomodels. Fix #208
           * KEGG: fixed #204 #202 and #203
diff --git a/setup.py b/setup.py
index 822ac66..76d1d25 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
 
 
 _MAJOR               = 1
-_MINOR               = 9
+_MINOR               = 10
 _MICRO               = 0
 version              = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
 release              = '%d.%d' % (_MAJOR, _MINOR)

From 5b3160f08d48820fb10c2bb1edac12d82e0d6442 Mon Sep 17 00:00:00 2001
From: Thomas Cokelaer <cokelaer@gmail.com>
Date: Fri, 1 Jul 2022 15:54:22 +0200
Subject: [PATCH 5/5] Fix peptides.py to reflect new uniprot api

---
 src/bioservices/apps/peptides.py | 5 +++--
 test/test_apps_peptides.py       | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bioservices/apps/peptides.py b/src/bioservices/apps/peptides.py
index 7566a12..3fc6aab 100644
--- a/src/bioservices/apps/peptides.py
+++ b/src/bioservices/apps/peptides.py
@@ -8,7 +8,6 @@ class Peptides(object):
     ::
 
         >>> p = Peptides()
-        >>> p.get_fasta_sequence("Q8IYB3")
         >>> p.get_peptide_position("Q8IYB3", "VPKPEPIPEPKEPSPE")
         189
 
@@ -25,9 +24,11 @@ def __init__(self, verbose=False):
         self.sequences = {}
 
     def get_fasta_sequence(self, uniprot_name):
-        seq = self.u.get_fasta_sequence(uniprot_name)
+        seq = self.u.get_fasta(uniprot_name)
+        seq = "".join(seq.split("\n")[1:])
         return seq
 
+
     def get_phosphosite_position(self, uniprot_name, peptide):
         if uniprot_name not in self.sequences.keys():
             seq = self.get_fasta_sequence(uniprot_name)
diff --git a/test/test_apps_peptides.py b/test/test_apps_peptides.py
index 447528b..c36071d 100644
--- a/test/test_apps_peptides.py
+++ b/test/test_apps_peptides.py
@@ -6,5 +6,4 @@ def test_peptides():
     p = Peptides()
     pos = p.get_phosphosite_position("Q8IYB3", "VPKPEPIPEPKEPSPE")
     assert pos == [740, 901]
-    p.get_fasta_sequence("Q8IYB3")