From 708f5857cc8114b98b3b7974a53284e90b1017c5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 27 Nov 2020 13:13:54 +0000 Subject: [PATCH 01/12] split the ida modifier --- webfront/views/modifiers.py | 39 +++++++++++++++++++------------------ webfront/views/protein.py | 4 ++-- webfront/views/taxonomy.py | 9 +++++++++ 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 5b686313..1af75fef 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -384,31 +384,32 @@ def filter_by_latest_entries(value, general_handler): def get_domain_architectures(field, general_handler): - searcher = general_handler.searcher - size = general_handler.pagination["size"] - cursor = general_handler.pagination["cursor"] - if field is None or field.strip() == "": # TODO: is there a better way to get this? accession = general_handler.queryset_manager.filters["entry"][ "accession__iexact" ] return ida_search(accession, general_handler) - else: - query = ( - general_handler.queryset_manager.get_searcher_query() - + " && ida_id:" - + field - ) - res, length, after_key, before_key = searcher.get_list_of_endpoint( - "protein", rows=size, query=query, cursor=cursor - ) - general_handler.modifiers.search_size = length - general_handler.modifiers.after_key = after_key - general_handler.modifiers.before_key = before_key - return filter_queryset_accession_in( - general_handler.queryset_manager.get_base_queryset("protein"), res - ) + + +def filter_by_domain_architectures(field, general_handler): + searcher = general_handler.searcher + size = general_handler.pagination["size"] + cursor = general_handler.pagination["cursor"] + + query = ( + general_handler.queryset_manager.get_searcher_query() + " && ida_id:" + field + ) + endpoint = general_handler.queryset_manager.main_endpoint + res, length, after_key, before_key = searcher.get_list_of_endpoint( + endpoint, rows=size, query=query, cursor=cursor + ) + general_handler.modifiers.search_size = length + general_handler.modifiers.after_key = after_key + general_handler.modifiers.before_key = before_key + return filter_queryset_accession_in( + general_handler.queryset_manager.get_base_queryset(endpoint), res + ) def get_entry_annotation(field, general_handler): diff --git a/webfront/views/protein.py b/webfront/views/protein.py index 98884474..dbe8be87 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -9,7 +9,7 @@ filter_by_field, filter_by_boolean_field, get_single_value, - get_domain_architectures, + filter_by_domain_architectures, filter_by_contains_field, filter_by_match_presence, add_extra_fields, @@ -164,7 +164,7 @@ def get( general_handler.queryset_manager.add_filter("protein", source_database=ds) general_handler.modifiers.register( "ida", - get_domain_architectures, + filter_by_domain_architectures, use_model_as_payload=True, serializer=SerializerDetail.PROTEIN_HEADERS, many=True, diff --git a/webfront/views/taxonomy.py b/webfront/views/taxonomy.py index 86b6c1a7..b2caebe7 100644 --- a/webfront/views/taxonomy.py +++ b/webfront/views/taxonomy.py @@ -7,6 +7,7 @@ filter_by_key_species, filter_by_entry, filter_by_entry_db, + filter_by_domain_architectures, ) @@ -94,6 +95,14 @@ def get( "extra_fields", add_extra_fields(Taxonomy, "counters") ) general_handler.modifiers.register("key_species", filter_by_key_species) + general_handler.modifiers.register( + "ida", + filter_by_domain_architectures, + use_model_as_payload=True, + serializer=SerializerDetail.TAXONOMY_HEADERS, + many=True, + ) + return super(UniprotHandler, self).get( request._request, endpoint_levels, From 61e2c26e6afa500cb0ca05882869bc07a75a3f05 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 30 Nov 2020 09:18:37 +0000 Subject: [PATCH 02/12] adding a new type of modifier: extender --- webfront/constants.py | 6 ++++++ webfront/pagination.py | 20 +++++++++--------- webfront/views/MODIFIER_README.md | 5 ++++- webfront/views/custom.py | 11 ++++++++-- webfront/views/entry.py | 34 +++++++++++++++++------------- webfront/views/modifier_manager.py | 30 ++++++++++++++++++++------ webfront/views/modifiers.py | 24 +++++++++++++++++++++ webfront/views/protein.py | 21 +++++++++++------- webfront/views/proteome.py | 3 ++- webfront/views/set.py | 3 ++- webfront/views/structure.py | 3 ++- webfront/views/taxonomy.py | 11 +++++++--- 12 files changed, 122 insertions(+), 49 deletions(-) diff --git a/webfront/constants.py b/webfront/constants.py index 66c3093f..c17aaaee 100644 --- a/webfront/constants.py +++ b/webfront/constants.py @@ -64,3 +64,9 @@ class QuerysetType(Enum): ENTRY_PROTEIN = 150 ENTRY_STRUCTURE = 160 STRUCTURE_PROTEIN = 250 + + +class ModifierType(Enum): + FILTER = 1 + REPLACE_PAYLOAD = 2 + EXTEND_PAYLOAD = 3 diff --git a/webfront/pagination.py b/webfront/pagination.py index 348b2a15..c7b7615b 100644 --- a/webfront/pagination.py +++ b/webfront/pagination.py @@ -33,16 +33,16 @@ class CustomPagination(CursorPagination): before_key = None def get_paginated_response(self, data): - return Response( - OrderedDict( - [ - ("count", self.current_size), - ("next", self.get_next_link()), - ("previous", self.get_previous_link()), - ("results", data), - ] - ) - ) + base = [ + ("count", self.current_size), + ("next", self.get_next_link()), + ("previous", self.get_previous_link()), + ("results", data["data"]), + ] + if "extensions" in data and len(data["extensions"]) > 0: + for ext in data["extensions"]: + base.append((ext, data["extensions"][ext])) + return Response(OrderedDict(base)) def _get_position_from_instance(self, instance, ordering): if type(instance) == tuple: diff --git a/webfront/views/MODIFIER_README.md b/webfront/views/MODIFIER_README.md index 1890e2b4..de00bc0c 100644 --- a/webfront/views/MODIFIER_README.md +++ b/webfront/views/MODIFIER_README.md @@ -12,7 +12,10 @@ This are the parameters of the method to register a modifier: * `action`: The modifier function. It should returns a queryset or None. And its parameters are: * `value`: The value given as a URL parameter. * `general_handler`: The handler that is in charge of the current request. -* `use_model_as_payload`: (default: `False`) If the modifier needs to replace the queryset to be serialized. +* `type`: (default: `ModifierType.EXTEND_PAYLOAD`) There are 3 types of modifiers: + * `ModifierType.FILTER` It add new filters into the queryset before it gets executed or serialized. + * `ModifierType.REPLACE_PAYLOAD` Creates a new payload that replaces the one that would normally been used by the `custom view` + * `ModifierType.EXTEND_PAYLOAD` Extend an already calculated payload. The data extending the payload is added as a new key at the level of `"metadata"` or `"results"`. * `serializer`: (default: `None`) In case the modification requires to be serialized in an specific way. * `many`: (default: `False`) This is to explicitely indicate when the modifier queryset has *many* results and needs to be iterated. This is useful to indicate the pagination logic needs to be included. Note that this only makes sense if `use_model_as_payload == True`. * `works_in_single_endpoint`: (default: `True`) It indicates that a given modifier works for single endpoints URLs. If is false it will raise an exception when a single endpoint URL has this modifier diff --git a/webfront/views/custom.py b/webfront/views/custom.py index 3805adf8..9f6e864b 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -50,7 +50,7 @@ class CustomView(GenericAPIView): serializer_detail_filter = SerializerDetail.ALL after_key = None before_key = None - http_method_names = ['get', 'head'] + http_method_names = ["get", "head"] def get( self, @@ -157,8 +157,15 @@ def get( queryset_manager=general_handler.queryset_manager, ) + payload = {"data": serialized.data} + extensions = general_handler.modifiers.execute_extenders( + drf_request, serialized.data + ) + if extensions != {}: + payload["extensions"] = extensions + if self.many: - return self.get_paginated_response(serialized.data) + return self.get_paginated_response(payload) else: return Response(serialized.data) diff --git a/webfront/views/entry.py b/webfront/views/entry.py index bc98486e..afb882ea 100644 --- a/webfront/views/entry.py +++ b/webfront/views/entry.py @@ -1,5 +1,5 @@ from django.db.models import Count -from webfront.models.interpro_new import Release_Note +from webfront.constants import ModifierType from webfront.exceptions import DeletedEntryError from webfront.models import Entry @@ -52,16 +52,18 @@ def get( general_handler.modifiers.register( "annotation", get_entry_annotation, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.ANNOTATION_BLOB, ) general_handler.modifiers.register( - "annotation:info", get_entry_annotation_info, use_model_as_payload=True + "annotation:info", + get_entry_annotation_info, + type=ModifierType.REPLACE_PAYLOAD, ) general_handler.modifiers.register( "ida", get_domain_architectures, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.IDA_LIST, ) @@ -136,7 +138,7 @@ def get( "tax_id": "tax_id", }, ), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, # serializer=SerializerDetail.GROUP_BY_MEMBER_DATABASES ) general_handler.modifiers.register( @@ -144,7 +146,9 @@ def get( ) general_handler.modifiers.register( - "interpro_status", get_interpro_status_counter, use_model_as_payload=True + "interpro_status", + get_interpro_status_counter, + type=ModifierType.REPLACE_PAYLOAD, ) return super(MemberHandler, self).get( @@ -211,16 +215,18 @@ def get( general_handler.modifiers.register( "ida", get_domain_architectures, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.IDA_LIST, ) general_handler.modifiers.register( "interactions", get_value_for_field("interactions"), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, ) general_handler.modifiers.register( - "pathways", get_value_for_field("pathways"), use_model_as_payload=True + "pathways", + get_value_for_field("pathways"), + type=ModifierType.REPLACE_PAYLOAD, ) return super(AccessionHandler, self).get( @@ -382,7 +388,7 @@ def get( "go_terms": "text", }, ), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, # serializer=SerializerDetail.GROUP_BY_MEMBER_DATABASES ) general_handler.modifiers.register( @@ -525,7 +531,7 @@ def get( "go_terms": "text", }, ), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, ) general_handler.modifiers.register( @@ -546,14 +552,12 @@ def get( filter_by_contains_field("entry", "go_terms", '"identifier": "{}"'), ) general_handler.modifiers.register( - "annotation", - filter_by_contains_field("entry", "entryannotation__type"), - use_model_as_payload=False, + "annotation", filter_by_contains_field("entry", "entryannotation__type") ) general_handler.modifiers.register( "ida_search", ida_search, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.IDA_LIST, ) response = super(EntryHandler, self).get( diff --git a/webfront/views/modifier_manager.py b/webfront/views/modifier_manager.py index ce1702e9..f9f6cfb4 100644 --- a/webfront/views/modifier_manager.py +++ b/webfront/views/modifier_manager.py @@ -1,4 +1,5 @@ from webfront.views.custom import is_single_endpoint +from webfront.constants import ModifierType class ModifierManager: @@ -16,7 +17,7 @@ def register( self, parameter, action, - use_model_as_payload=False, + type=ModifierType.FILTER, serializer=None, many=False, works_in_single_endpoint=True, @@ -24,7 +25,7 @@ def register( ): self.modifiers[parameter] = { "action": action, - "use_model_as_payload": use_model_as_payload, + "type": type, "serializer": serializer, "many": many, "works_in_single_endpoint": works_in_single_endpoint, @@ -39,13 +40,13 @@ def _check_modifier(self, modifier): single = is_single_endpoint(self.general_handler) if single and not self.modifiers[modifier]["works_in_single_endpoint"]: raise Exception( - "The modifier '{}' doen't work on URLs of a single endpoint".format( + "The modifier '{}' doesn't work on URLs of a single endpoint".format( modifier ) ) if not single and not self.modifiers[modifier]["works_in_multiple_endpoint"]: raise Exception( - "The modifier '{}' doen't work on URLs of multiple endpoints".format( + "The modifier '{}' doesn't work on URLs of multiple endpoints".format( modifier ) ) @@ -53,12 +54,13 @@ def _check_modifier(self, modifier): def execute(self, request): payload_modifiers = {} queryset_modifiers = {} - for p, m in self.modifiers.items(): - if m["use_model_as_payload"]: + for p, m in self.modifiers.items(): + if m["type"] == ModifierType.REPLACE_PAYLOAD: payload_modifiers[p] = m - else: + elif m["type"] == ModifierType.FILTER: queryset_modifiers[p] = m + for modifier in queryset_modifiers: param = request.query_params.get(modifier) if param is not None: @@ -86,3 +88,17 @@ def execute(self, request): ) use_model_as_payload = True return use_model_as_payload + + def execute_extenders(self, request, current_payload): + extenders = {} + for p, m in self.modifiers.items(): + if m["type"] == ModifierType.EXTEND_PAYLOAD: + extenders[p] = m + extensions = {} + for extender in extenders: + param = request.query_params.get(extender) + if param is not None: + extensions[extender] = self.modifiers[extender]["action"]( + param, current_payload + ) + return extensions diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 1af75fef..67a36238 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -10,6 +10,7 @@ Release_Note, TaxonomyPerEntry, TaxonomyPerEntryDB, + Taxonomy, ) from webfront.views.custom import filter_queryset_accession_in from webfront.exceptions import EmptyQuerysetError, HmmerWebError @@ -709,5 +710,28 @@ def x(value, general_handler): return x +def add_taxonomy_names(value, current_payload): + names = {} + to_query = set() + for taxon in current_payload: + names[taxon["metadata"]["accession"]] = taxon["metadata"]["name"] + if "parent" in taxon["metadata"] and taxon["metadata"]["parent"] is not None: + to_query.add(taxon["metadata"]["parent"]) + if ( + "children" in taxon["metadata"] + and taxon["metadata"]["children"] is not None + ): + for child in taxon["metadata"]["children"]: + to_query.add(child) + if "extra_fields" in taxon and "lineage" in taxon["extra_fields"]: + for tax in taxon["extra_fields"]["lineage"].split(): + to_query.add(tax) + + qs = Taxonomy.objects.filter(accession__in=[q for q in to_query if q not in names]) + for t in qs: + names[t.accession] = t.scientific_name + return names + + def passing(x, y): pass diff --git a/webfront/views/protein.py b/webfront/views/protein.py index dbe8be87..d23ec4b6 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -17,6 +17,7 @@ calculate_residue_conservation, ) from webfront.models import Protein +from webfront.constants import ModifierType from django.conf import settings entry_db_members = "|".join(settings.DB_MEMBERS) @@ -47,24 +48,28 @@ def get( "protein", accession__iexact=endpoint_levels[level - 1] ) general_handler.modifiers.register( - "residues", get_single_value("residues"), use_model_as_payload=True + "residues", get_single_value("residues"), type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( - "structureinfo", get_single_value("structure"), use_model_as_payload=True + "structureinfo", + get_single_value("structure"), + type=ModifierType.REPLACE_PAYLOAD, ) general_handler.modifiers.register( - "ida", get_single_value("ida", True), use_model_as_payload=True + "ida", get_single_value("ida", True), type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( "extra_features", get_single_value("extra_features"), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, ) general_handler.modifiers.register( - "isoforms", get_isoforms, use_model_as_payload=True + "isoforms", get_isoforms, type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( - "conservation", calculate_residue_conservation, use_model_as_payload=True + "conservation", + calculate_residue_conservation, + type=ModifierType.REPLACE_PAYLOAD, ) return super(UniprotAccessionHandler, self).get( request._request, @@ -165,7 +170,7 @@ def get( general_handler.modifiers.register( "ida", filter_by_domain_architectures, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.PROTEIN_HEADERS, many=True, ) @@ -257,7 +262,7 @@ def get( "is_fragment": "protein_is_fragment", }, ), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, ) diff --git a/webfront/views/proteome.py b/webfront/views/proteome.py index 62f802eb..ad8227f5 100644 --- a/webfront/views/proteome.py +++ b/webfront/views/proteome.py @@ -2,6 +2,7 @@ from webfront.serializers.proteome import ProteomeSerializer from webfront.views.custom import CustomView, SerializerDetail from webfront.views.modifiers import group_by, add_extra_fields, filter_by_boolean_field +from webfront.constants import ModifierType class ProteomeAccessionHandler(CustomView): @@ -121,7 +122,7 @@ def get( general_handler.modifiers.register( "group_by", group_by(Proteome, {"proteome_is_reference": "proteome_acc"}), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, ) general_handler.modifiers.register( diff --git a/webfront/views/set.py b/webfront/views/set.py index a4b36ee9..5eb58074 100644 --- a/webfront/views/set.py +++ b/webfront/views/set.py @@ -4,6 +4,7 @@ from webfront.serializers.collection import SetSerializer from webfront.views.custom import CustomView, SerializerDetail from django.conf import settings +from webfront.constants import ModifierType from webfront.views.modifiers import add_extra_fields, get_set_alignment @@ -57,7 +58,7 @@ def get( general_handler.modifiers.register( "alignments", get_set_alignment, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, many=True, serializer=SerializerDetail.SET_ALIGNMENT, ) diff --git a/webfront/views/structure.py b/webfront/views/structure.py index 172e3443..b96219ed 100644 --- a/webfront/views/structure.py +++ b/webfront/views/structure.py @@ -9,6 +9,7 @@ filter_by_field_or_field_range, add_extra_fields, ) +from webfront.constants import ModifierType class ChainPDBAccessionHandler(CustomView): @@ -185,7 +186,7 @@ def get( general_handler.modifiers.register( "group_by", group_by(Structure, {"experiment_type": "structure_evidence"}), - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, ) general_handler.modifiers.register( diff --git a/webfront/views/taxonomy.py b/webfront/views/taxonomy.py index b2caebe7..75800982 100644 --- a/webfront/views/taxonomy.py +++ b/webfront/views/taxonomy.py @@ -8,7 +8,9 @@ filter_by_entry, filter_by_entry_db, filter_by_domain_architectures, + add_taxonomy_names, ) +from webfront.constants import ModifierType class TaxonomyAccessionHandler(CustomView): @@ -40,13 +42,13 @@ def get( "filter_by_entry", filter_by_entry, serializer=SerializerDetail.TAXONOMY_PER_ENTRY, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, ) general_handler.modifiers.register( "filter_by_entry_db", filter_by_entry_db, serializer=SerializerDetail.TAXONOMY_PER_ENTRY_DB, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, ) return super(TaxonomyAccessionHandler, self).get( @@ -98,10 +100,13 @@ def get( general_handler.modifiers.register( "ida", filter_by_domain_architectures, - use_model_as_payload=True, + type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.TAXONOMY_HEADERS, many=True, ) + general_handler.modifiers.register( + "with_names", add_taxonomy_names, type=ModifierType.EXTEND_PAYLOAD + ) return super(UniprotHandler, self).get( request._request, From 5bc36e9486adb60028f2cc49bad87895425550d4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 5 Jan 2021 11:35:22 +0000 Subject: [PATCH 03/12] correction in readme --- webfront/views/MODIFIER_README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webfront/views/MODIFIER_README.md b/webfront/views/MODIFIER_README.md index de00bc0c..26fbf876 100644 --- a/webfront/views/MODIFIER_README.md +++ b/webfront/views/MODIFIER_README.md @@ -12,7 +12,7 @@ This are the parameters of the method to register a modifier: * `action`: The modifier function. It should returns a queryset or None. And its parameters are: * `value`: The value given as a URL parameter. * `general_handler`: The handler that is in charge of the current request. -* `type`: (default: `ModifierType.EXTEND_PAYLOAD`) There are 3 types of modifiers: +* `type`: (default: `ModifierType.FILTER`) There are 3 types of modifiers: * `ModifierType.FILTER` It add new filters into the queryset before it gets executed or serialized. * `ModifierType.REPLACE_PAYLOAD` Creates a new payload that replaces the one that would normally been used by the `custom view` * `ModifierType.EXTEND_PAYLOAD` Extend an already calculated payload. The data extending the payload is added as a new key at the level of `"metadata"` or `"results"`. From 0d16463bd468a1b0e2da05fb56b93cf302c5e8e2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 11 Jan 2021 14:57:21 +0000 Subject: [PATCH 04/12] sequence as binaryField --- webfront/migrations/0012_seq_and_seq_raw.py | 19 ++++++++++++++ webfront/models/interpro_new.py | 15 ++++++++--- webfront/tests/fixtures_protein.json | 28 +++------------------ webfront/tests/tests_modifiers.py | 9 ++++--- webfront/views/protein.py | 2 +- 5 files changed, 42 insertions(+), 31 deletions(-) create mode 100644 webfront/migrations/0012_seq_and_seq_raw.py diff --git a/webfront/migrations/0012_seq_and_seq_raw.py b/webfront/migrations/0012_seq_and_seq_raw.py new file mode 100644 index 00000000..c0153707 --- /dev/null +++ b/webfront/migrations/0012_seq_and_seq_raw.py @@ -0,0 +1,19 @@ +# Generated by Django 3.0.7 on 2021-01-11 13:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [("webfront", "0011_pfam2interpro")] + + operations = [ + migrations.RemoveField(model_name="protein", name="extra_features"), + migrations.RemoveField(model_name="protein", name="residues"), + migrations.RemoveField(model_name="protein", name="sequence"), + migrations.AddField( + model_name="protein", + name="sequence_bin", + field=models.BinaryField(db_column="sequence", null=True), + ), + ] diff --git a/webfront/models/interpro_new.py b/webfront/models/interpro_new.py index cb500664..6eb0af38 100644 --- a/webfront/models/interpro_new.py +++ b/webfront/models/interpro_new.py @@ -1,3 +1,5 @@ +import gzip + from django.db import models from jsonfield import JSONField @@ -60,7 +62,7 @@ class Protein(models.Model): organism = JSONField(null=True) name = models.CharField(max_length=20) description = JSONField(null=True) - sequence = models.TextField(null=False) + sequence_bin = models.BinaryField(db_column="sequence", null=True) length = models.IntegerField(null=False) proteome = models.CharField(max_length=20, null=True) gene = models.CharField(max_length=70, null=True) @@ -69,8 +71,8 @@ class Protein(models.Model): source_database = models.CharField( max_length=20, default="unreviewed", db_index=True ) - residues = JSONField(null=True) - extra_features = JSONField(null=True) + # residues = JSONField(null=True) + # extra_features = JSONField(null=True) structure = JSONField(default={}, null=True) is_fragment = models.BooleanField(default=False) tax_id = models.CharField(max_length=20, null=False, default="") @@ -78,6 +80,13 @@ class Protein(models.Model): ida = models.TextField(null=True) counts = JSONField(null=True) + @property + def sequence(self): + if self.sequence_bin is not None: + return gzip.decompress(self.sequence_bin) + else: + return None + class Structure(models.Model): accession = models.CharField(max_length=4, primary_key=True) diff --git a/webfront/tests/fixtures_protein.json b/webfront/tests/fixtures_protein.json index 8bca8bf3..c0c1436f 100644 --- a/webfront/tests/fixtures_protein.json +++ b/webfront/tests/fixtures_protein.json @@ -13,7 +13,7 @@ "description": [ "Vacuolar carboxypeptidase involved in degradation of small peptides. Digests preferentially peptides containing an aliphatic or hydrophobic residue in P1' position, as well as methionine, leucine or phenylalanine in P1 position of ester substrate (By similarity)." ], - "sequence": "MRVLPATLLVGAATAAVPPFQQILGLPKKGADTLSKPLHDFQEQLKTLSDDARRLWDEVAKHFPDSMDHNPVFSLPKKHTRRPDSHWDHIVRGADVQSVWVTGANGEKEREVDGKLEAYDLRVKTTDPGALGIDPGVKQYTGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFLELGPSSIDSKIKPVYNDFAWNSNASVIFLDQPVNVGYSYSGSAVSDTVAAGKDVYALLTLFFKQFPEYAKQDFHIAGESYAGHYIPVFASEILSHKKRNINLKSVLIGNGLTDPLTQYDHYRPMACGDGGYPAVLDEASCQSMDNALPRCKSMIESCYNTESSWVCVPASIYCNNALIGPYQRTGQNVYDVRGKCEDESNLCYKGMGYVSEYLNKREVREAVGAEVDGYDSCNFDINRNFLFHGDWMKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEYASAELEDLVIEQNEHQGKKIGQIKSHGNFTFMRLYGGGHMVPMDQPEASLEFFNRWIGGEWF", + "sequence_bin": "H4sIAEFb/F8C/xWRbQpFIQhE1yb5UeiVyjDc/0Ze74eIysDM8dtpE45ZCsAByDl5rWFiU1UAj4VO68iLlumbEGFvu0gJ2nlifNh9Jsdf0M/eb9Uv9pH7yXNF3jwCLqS0KVHUCAptp56DU8BkvJa66kgZopOjduO6zBTbceqrTLvmIlNahB0xNjKZEQNDh84sR4br4RA52HDN9JSKCgnIwJMAopgFZseYdfGkAl3IfYBQFEiv8ZJA0LDoqtuHm0baEBd7Zu2swl57ftAERWpCGhJEWw+Dg83dNL5B0coPRdxsOSFGNX/XIbPWPrI8Cx8ebYQUbq1UPqkMKnN9kDbBe8cfVmE0Zxy+nY274P10Vt+WU8xoPb82nnME5NH8mrjCDQKjO2XpSxhARmg5aDn1JapD1tDo4nz421Yi0r+c32P2shgx+75DhC7/APPZcnUfAgAA", "length": "543", "proteome": "UP000006701", "gene": "cpyA", @@ -30,23 +30,6 @@ "evidence_code": 3, "source_database": "reviewed", "is_fragment": false, - "residues": {}, - "extra_features": { - "TMhelix": { - "accession": "TMhelix", - "source_database": "tmhmm", - "locations": [ - { - "fragments": [ - { - "start": 265, - "end": 287 - } - ] - } - ] - } - }, "counts": { "entries": { "total": 5, @@ -72,7 +55,7 @@ }, "name": "Band 7 protein", "description": [], - "sequence": "MESGIIEILIRNGVSHMTEKPVFHINGYLGLILVLVILGLGVYLSVVGWGVLGVILVVLAVLAASSLTIIEPNQSKVLTFFGRYIGTIKESGLYLTVPLTTKTTVSLRVRNFNSAILKVNDLQGNPVEIAAVIVFKVVDTSKALFAVEDYEKFVEIQSESAIRHVASEYAYDNFGDHQALTLRSNPTEVSNHLTEELQARLEVAGVQIIETRLTHLAYATEIASAMLQRQQSQAILSARKIIVEGAVSITEGAIEQLAAETDLHLTDNQKLQLINNMMVSIINERGSQPVINTGKVE", + "sequence_bin": "H4sIAEFc/F8C/w2PAQoEMQgD3ybUutKsbKvk6P8/coJgMDrqq2nu6vATxnze0vVxPh52YXAQ9BbGiyTtZ2zdZUI6JBPV81/sXETNaee6la/m4qL4oWpVMXF4YkaKYzEGtsVHdRE65yJH5RJMoY6ra7a1U7v7PJTUK3fEtPFsQeFkfKXMeFCq2HKgFOPuU+qgHsiVanjKi332zt1rU85yp5owvTq57n5Ba6A5I/bChke8b/seeiz3R4+yRf0D4x3lAykBAAA=", "length": 297, "proteome": "UP000012042", "gene": "LVISKB_0797", @@ -153,7 +136,6 @@ "evidence_code": 1, "source_database": "reviewed", "is_fragment": true, - "residues": {}, "counts": { "entries": { "pfam": 1, @@ -178,7 +160,7 @@ }, "name": "Propeptide, carboxypeptidase Y", "description": [], - "sequence": "MRVLSTTLLVGAASAAAPSFQQVLGAHSEHAENVAQQGADAFKPLQHLQDQFKSLSSEARQLWEEVSNYFPESMGSAPMLSLPKKHTRRPDSHWDYHVSGAKVQDIWVSGAEGTKEREVDGKLEDYALRAKKVDPSALGIDPGVKQYSGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFMELGPSSIGANIKPIYNDFSWNNNASVIFLDQPINVGYSYSGSSVSDTVAAGKDVYALLTLFFKQFPEYATQDFHIAGESYAGHYIPVMASEILSHKKRNINLKSVLIGNGLTDGLTQYEYYRPMACGEGGYPAVLDESTCQSMDNALSRCQSMIQSCYNSESPWVCVPASIYCNNAMLGPYQRTGQNVYDVRGKCEDESNLCYKGLGYVSEYLGQESVREAVGAEVDGYDSCNFDINRNFLFNGDWFKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEFASAELEDLKIVQNEHVGKKIGQIKSHGNFTFMRIFGGGHMVPMDQPESGLEFFNRWIGGEWF", + "sequence_bin": "H4sIAMhc/F8C/xWRSw5EIQgEz0YeHw1IVAyG+19knAVJb4BK19hpcY5ZCkAAwAxeK02gBTUgT1hLAIF12mq2cLGGRRDsZZcow4snxZCAOSxsqraz98RoF6tlCGgu7PefSI7SpkRRIyywDaqJM8Ck45TUVSFliE6O2ozrMlNsx6lvMu2ai0z5IuyI8SCTGdEFvOvs5chx3R0iOxuu2T2l4l2NyMCTAKKY77UdY9b14AvOQm4dhKJAWvWZA4K6RVPd3t000rq42ME3q6hqzwGfkEhNSEOK860Y6GCx/6mv+MqDYt78ckL0+h7XeLy19pHlWZhb9KO37PaVikllUJksitwEz8q/rML4nLH7djZ2wft0VNuWU8xoPV7rjxwBuX9+TVzhPkdGd8pS4ueWXuOmPZdTS1HtsrpGE+fDY3cWkTZyjtcZhRgx+75dhC7/APSq380mAgAA", "length": "550", "proteome": "UP000030104", "gene": "PITC_084940", @@ -195,7 +177,6 @@ "evidence_code": 1, "source_database": "unreviewed", "is_fragment": false, - "residues": {}, "counts": { "entries": { "total": 0 @@ -219,7 +200,7 @@ "description": [ "Receptor for lutropin-choriogonadotropic hormone. The activity of this receptor is mediated by G proteins which activate adenylate cyclase." ], - "sequence": "MRRRSLALRLLLALLLLPPPLPQTLLGAPCPEPCSCRPDGALRCPGPRAGLSRLSLTYLPIKVIPSQAFRGLNEVVKIEISQSDSLEKIEANAFDNLLNLSEILIQNTKNLVYIEPGAFTNLPRLKYLSICNTGIRKLPDVTKIFSSEFNFILEICDNLHITTVPANAFQGMNNESITLKLYGNGFEEIQSHAFNGTTLISLELKENAHLKKMHNDAFRGARGPSILDISSTKLQALPSYGLESIQTLIATSSYSLKKLPSREKFTNLLDATLTYPSHCCAFRNLPTKEQNFSFSIFKNFSKQCESTARRPNNETLYSAIFAESELSDWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYDFLRVLIWLINILAIMGNVTVLFVLLTSHYKLTVPRFLMCNLSFADFCMGLYLLLIASVDAQTKGQYYNHAIDWQTGNGCSVAGFFTVFASELSVYTLTVITLERWHTITYAIQLDQKLRLRHAIPIMLGGWLFSTLIAMLPLVGVSSYMKVSICLPMDVETTLSQVYILTILILNVVAFIIICACYIKIYFAVQNPELMATNKDTKIAKKMAVLIFTDFTCMAPISFFAISAALKVPLITVTNSKVLLVLFYPVNSCANPFLYAIFTKAFRRDFFLLLSKSGCCKHQAELYRRKDFSAYCKNGFTGSNKPSRSTLKLTTLQCQYSTVMDKTCYKDC", + "sequence_bin": "H4sIAE1d/F8C/xWRUQ4EIQhDz0ZUHEKHoBA33v8iy/ihhoD2te/eO0DYQO213B2+EpjkzYe3aNv7rI7m0zdNxEYgL1z0iMci3hM2zlEZEit6YNSVjLgbYIghkGWphnNl+CROg2/oRUiznLIV3k+qcMRgY8GQVtOPZB7/nlrzNRshCcWdNnkMWfEQ28yE1J/QYfRA9X2sf6JoTw9Bl4hULILHnag3Ck8oI25Ud1X30E8ROmWBeTyt1XxJTB3LODiEtU5dbUTS3l5SEjdImEYMRP/12+/kFq6J1aicKw3mbXR55+2MfSA/iAmoKnbygA+Q8VxFQW7G28osps7tnbgVhlCcTit1rnvtIem/lQXf4tBkzsP0/X5u6T5lzdi/JyUvyUJfWqnuGnJ5MecPHB/3C8eZp+hfPWU//O1nlIexKh1kZQU7h1hEGrUrKpfpLPOBl9K0V0xULlMBcXbO9pJLMJMEEfQ4JE9aaOEV5PVj0cicUcI4tbzdnbn4QmO2ps+igbu3dg66TSvdnGFaycSXd36Wrht53q7Zrvb2B5BjI824AgAA", "length": 696, "proteome": "UP000030104", "gene": "LHCGR", @@ -236,7 +217,6 @@ "evidence_code": 4, "source_database": "unreviewed", "is_fragment": false, - "residues": {}, "counts": { "entries": { "profile": 1, diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 5f0c5e27..4f8072c5 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -234,7 +234,7 @@ class ExtraFieldsModifierTest(InterproRESTTestCase): "go_terms", "evidence_code", "source_database", - "residues", + # "residues", "structure", "is_fragment", "tax_id", @@ -417,12 +417,15 @@ def test_no_pathways_modifier(self): response = self.client.get("/api/entry/interpro/IPR003165?pathways") self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) + class TaxonomyScientificNameModifierTest(InterproRESTTestCase): def test_scientific_name_modifier(self): - response = self.client.get("/api/taxonomy/uniprot/?scientific_name=Penicillium+italicum") + response = self.client.get( + "/api/taxonomy/uniprot/?scientific_name=Penicillium+italicum" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("metadata", response.data) self.assertIn("accession", response.data["metadata"]) self.assertIn("counters", response.data["metadata"]) self.assertEqual("40296", response.data["metadata"]["accession"]) - self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) \ No newline at end of file + self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) diff --git a/webfront/views/protein.py b/webfront/views/protein.py index d23ec4b6..a96ee20e 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -182,7 +182,7 @@ def get( filter_by_contains_field("protein", "go_terms", '"identifier": "{}"'), ) general_handler.modifiers.register( - "extra_fields", add_extra_fields(Protein, "counters") + "extra_fields", add_extra_fields(Protein, "counters", "sequence") ) return super(UniprotHandler, self).get( request._request, From 821d96e1dbe4af5e74198418f66fe832efd256b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 11 Jan 2021 17:26:59 +0000 Subject: [PATCH 05/12] extra_features and residues on their own table --- webfront/migrations/0013_protein_extra.py | 38 ++++++++++++++++ webfront/models/interpro_new.py | 26 +++++++++++ webfront/tests/fixtures_protein.json | 26 ++++++++++- webfront/tests/tests_modifiers.py | 16 ++++++- webfront/views/modifiers.py | 53 +++++++++++++++++++++++ webfront/views/protein.py | 8 ++-- 6 files changed, 161 insertions(+), 6 deletions(-) create mode 100644 webfront/migrations/0013_protein_extra.py diff --git a/webfront/migrations/0013_protein_extra.py b/webfront/migrations/0013_protein_extra.py new file mode 100644 index 00000000..8b1457fa --- /dev/null +++ b/webfront/migrations/0013_protein_extra.py @@ -0,0 +1,38 @@ +# Generated by Django 3.0.7 on 2021-01-11 16:29 + +from django.db import migrations, models +import jsonfield.fields + + +class Migration(migrations.Migration): + + dependencies = [("webfront", "0012_seq_and_seq_raw")] + + operations = [ + migrations.CreateModel( + name="ProteinExtraFeatures", + fields=[ + ("feature_id", models.IntegerField(primary_key=True, serialize=False)), + ("protein_acc", models.CharField(max_length=15)), + ("entry_acc", models.CharField(max_length=25)), + ("source_database", models.CharField(max_length=10)), + ("location_start", models.IntegerField()), + ("location_end", models.IntegerField()), + ("sequence_feature", models.CharField(max_length=35)), + ], + options={"db_table": "webfront_proteinfeature"}, + ), + migrations.CreateModel( + name="ProteinResidues", + fields=[ + ("residue_id", models.IntegerField(primary_key=True, serialize=False)), + ("protein_acc", models.CharField(max_length=15)), + ("entry_acc", models.CharField(max_length=25)), + ("entry_name", models.CharField(max_length=100)), + ("source_database", models.CharField(max_length=10)), + ("description", models.CharField(max_length=255)), + ("fragments", jsonfield.fields.JSONField(null=True)), + ], + options={"db_table": "webfront_proteinresidue"}, + ), + ] diff --git a/webfront/models/interpro_new.py b/webfront/models/interpro_new.py index 6eb0af38..00033ec1 100644 --- a/webfront/models/interpro_new.py +++ b/webfront/models/interpro_new.py @@ -88,6 +88,32 @@ def sequence(self): return None +class ProteinExtraFeatures(models.Model): + feature_id = models.IntegerField(primary_key=True) + protein_acc = models.CharField(max_length=15) + entry_acc = models.CharField(max_length=25) + source_database = models.CharField(max_length=10) + location_start = models.IntegerField() + location_end = models.IntegerField() + sequence_feature = models.CharField(max_length=35) + + class Meta: + db_table = "webfront_proteinfeature" + + +class ProteinResidues(models.Model): + residue_id = models.IntegerField(primary_key=True) + protein_acc = models.CharField(max_length=15) + entry_acc = models.CharField(max_length=25) + entry_name = models.CharField(max_length=100) + source_database = models.CharField(max_length=10) + description = models.CharField(max_length=255) + fragments = JSONField(null=True) + + class Meta: + db_table = "webfront_proteinresidue" + + class Structure(models.Model): accession = models.CharField(max_length=4, primary_key=True) name = models.CharField(max_length=512) diff --git a/webfront/tests/fixtures_protein.json b/webfront/tests/fixtures_protein.json index c0c1436f..cbb6f8f0 100644 --- a/webfront/tests/fixtures_protein.json +++ b/webfront/tests/fixtures_protein.json @@ -229,7 +229,7 @@ "ida_id": 590134 } }, - { + { "model": "webfront.Isoforms", "fields": { "accession": "A1CUJ5-2", @@ -269,5 +269,29 @@ } } } + }, + { + "model": "webfront.ProteinExtraFeatures", + "fields": { + "feature_id": 1, + "protein_acc": "A1CUJ5", + "entry_acc": "TMhelix", + "source_database": "tmhmm", + "sequence_feature": "TMhelix", + "location_start": 265, + "location_end": 287 + } + }, + { + "model": "webfront.ProteinResidues", + "fields": { + "residue_id": 1, + "entry_name": "the residue", + "description": "a single residue", + "fragments": [["X",5,5]], + "protein_acc": "A1CUJ5", + "entry_acc": "residue", + "source_database": "cddd" + } } ] diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 4f8072c5..636129c3 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -234,7 +234,6 @@ class ExtraFieldsModifierTest(InterproRESTTestCase): "go_terms", "evidence_code", "source_database", - # "residues", "structure", "is_fragment", "tax_id", @@ -429,3 +428,18 @@ def test_scientific_name_modifier(self): self.assertIn("counters", response.data["metadata"]) self.assertEqual("40296", response.data["metadata"]["accession"]) self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) + + +class ResidueModifierTest(InterproRESTTestCase): + def test_residue_modifier_is_different_than_acc_protein(self): + response1 = self.client.get("/api/protein/uniprot/a1cuj5") + self.assertEqual(response1.status_code, status.HTTP_200_OK) + response2 = self.client.get("/api/protein/uniprot/a1cuj5?residues") + self.assertEqual(response2.status_code, status.HTTP_200_OK) + self.assertNotEquals(response1.data, response2.data) + + def test_residue_modifier(self): + response2 = self.client.get("/api/protein/uniprot/a1cuj5?residues") + self.assertEqual(response2.status_code, status.HTTP_200_OK) + self.assertIn("residue", response2.data) + self.assertIn("locations", response2.data["residue"]) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 4aa08968..70e23871 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -11,6 +11,8 @@ TaxonomyPerEntry, TaxonomyPerEntryDB, Taxonomy, + ProteinExtraFeatures, + ProteinResidues, ) from webfront.views.custom import filter_queryset_accession_in from webfront.exceptions import EmptyQuerysetError, HmmerWebError, ExpectedUniqueError @@ -751,5 +753,56 @@ def add_taxonomy_names(value, current_payload): return names +def extra_features(value, general_handler): + features = ProteinExtraFeatures.objects.filter( + protein_acc__in=general_handler.queryset_manager.get_queryset() + ) + payload = {} + for feature in features: + if feature.entry_acc not in payload: + payload[feature.entry_acc] = { + "accession": feature.entry_acc, + "source_database": feature.source_database, + "locations": [], + } + payload[feature.entry_acc]["locations"].append( + { + "fragments": [ + { + "start": feature.location_start, + "end": feature.location_end, + "seq_feature": feature.sequence_feature, + } + ] + } + ) + return payload + + +def residues(value, general_handler): + residues = ProteinResidues.objects.filter( + protein_acc__in=general_handler.queryset_manager.get_queryset() + ) + payload = {} + for residue in residues: + if residue.entry_acc not in payload: + payload[residue.entry_acc] = { + "accession": residue.entry_acc, + "source_database": residue.source_database, + "name": residue.entry_name, + "locations": [], + } + payload[residue.entry_acc]["locations"].append( + { + "description": residue.description, + "fragments": [ + {"residues": f[0], "start": f[1], "end": f[2]} + for f in residue.fragments + ], + } + ) + return payload + + def passing(x, y): pass diff --git a/webfront/views/protein.py b/webfront/views/protein.py index a96ee20e..bc176821 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -15,6 +15,8 @@ add_extra_fields, get_isoforms, calculate_residue_conservation, + extra_features, + residues, ) from webfront.models import Protein from webfront.constants import ModifierType @@ -48,7 +50,7 @@ def get( "protein", accession__iexact=endpoint_levels[level - 1] ) general_handler.modifiers.register( - "residues", get_single_value("residues"), type=ModifierType.REPLACE_PAYLOAD + "residues", residues, type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( "structureinfo", @@ -59,9 +61,7 @@ def get( "ida", get_single_value("ida", True), type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( - "extra_features", - get_single_value("extra_features"), - type=ModifierType.REPLACE_PAYLOAD, + "extra_features", extra_features, type=ModifierType.REPLACE_PAYLOAD ) general_handler.modifiers.register( "isoforms", get_isoforms, type=ModifierType.REPLACE_PAYLOAD From 15140ae8445f12ab453b7f2fb85a4f405271ca94 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 12 Jan 2021 09:56:20 +0000 Subject: [PATCH 06/12] group_by is many=False --- webfront/views/entry.py | 1 + webfront/views/protein.py | 1 + webfront/views/structure.py | 1 + 3 files changed, 3 insertions(+) diff --git a/webfront/views/entry.py b/webfront/views/entry.py index 84f27884..4b88432c 100644 --- a/webfront/views/entry.py +++ b/webfront/views/entry.py @@ -534,6 +534,7 @@ def get( ), type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, + many=False, ) general_handler.modifiers.register( "sort_by", diff --git a/webfront/views/protein.py b/webfront/views/protein.py index d23ec4b6..9c807fa0 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -264,6 +264,7 @@ def get( ), type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, + many=False, ) general_handler.modifiers.register( diff --git a/webfront/views/structure.py b/webfront/views/structure.py index b96219ed..c9156feb 100644 --- a/webfront/views/structure.py +++ b/webfront/views/structure.py @@ -188,6 +188,7 @@ def get( group_by(Structure, {"experiment_type": "structure_evidence"}), type=ModifierType.REPLACE_PAYLOAD, serializer=SerializerDetail.GROUP_BY, + many=False, ) general_handler.modifiers.register( "sort_by", From ffbb3502dc5151bec82cf170e64de76d2b61acae Mon Sep 17 00:00:00 2001 From: CodeGit Date: Fri, 29 Jan 2021 12:10:34 +0000 Subject: [PATCH 07/12] updated code and test to handle member database filter together with scientific_name search --- webfront/tests/tests_modifiers.py | 37 +++++++++- webfront/views/custom.py | 2 +- webfront/views/modifiers.py | 117 ++++++++++++++++++++++++++++-- 3 files changed, 144 insertions(+), 12 deletions(-) diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 5f0c5e27..cee62126 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -419,10 +419,41 @@ def test_no_pathways_modifier(self): class TaxonomyScientificNameModifierTest(InterproRESTTestCase): def test_scientific_name_modifier(self): - response = self.client.get("/api/taxonomy/uniprot/?scientific_name=Penicillium+italicum") + response = self.client.get( + "/api/taxonomy/uniprot/?scientific_name=Bacteria" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("metadata", response.data) self.assertIn("accession", response.data["metadata"]) self.assertIn("counters", response.data["metadata"]) - self.assertEqual("40296", response.data["metadata"]["accession"]) - self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) \ No newline at end of file + self.assertEqual("2", response.data["metadata"]["accession"]) + self.assertEqual(2, response.data["metadata"]["counters"]["entries"]) + self.assertEqual(2, response.data["metadata"]["counters"]["proteins"]) + + def test_scientific_name_modifier_member_database_filter(self): + response = self.client.get( + "/api/taxonomy/uniprot/entry/interpro?scientific_name=Bacteria" + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIn("metadata", response.data) + self.assertIn("accession", response.data["metadata"]) + self.assertIn("counters", response.data["metadata"]) + self.assertEqual("2", response.data["metadata"]["accession"]) + self.assertEqual(1, response.data["metadata"]["counters"]["entries"]) + self.assertEqual(1, response.data["metadata"]["counters"]["proteins"]) + + + +class ResidueModifierTest(InterproRESTTestCase): + def test_residue_modifier_is_different_than_acc_protein(self): + response1 = self.client.get("/api/protein/uniprot/a1cuj5") + self.assertEqual(response1.status_code, status.HTTP_200_OK) + response2 = self.client.get("/api/protein/uniprot/a1cuj5?residues") + self.assertEqual(response2.status_code, status.HTTP_200_OK) + self.assertNotEquals(response1.data, response2.data) + + def test_residue_modifier(self): + response2 = self.client.get("/api/protein/uniprot/a1cuj5?residues") + self.assertEqual(response2.status_code, status.HTTP_200_OK) + self.assertIn("residue", response2.data) + self.assertIn("locations", response2.data["residue"]) diff --git a/webfront/views/custom.py b/webfront/views/custom.py index 7f955b73..6763a711 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -33,7 +33,7 @@ class CustomView(GenericAPIView): # description of the level of the endpoint, for debug purposes level_description = "level" # List of tuples for the handlers for lower levels of the endpoint - # the firt item of each tuple will be regex or string to which the endpoint will be matched + # the first item of each tuple will be regex or string to which the endpoint will be matched # and the second item is the view handler that should proccess it. child_handlers = [] # Default queryset of this view diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index e938378c..439d3338 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -709,16 +709,117 @@ def x(value, general_handler): return x def get_taxonomy_by_scientific_name(scientific_name, general_handler): - general_handler.queryset_manager.filters["taxonomy"] = {"scientific_name": scientific_name} - - queryset = general_handler.queryset_manager.get_queryset() - if (queryset.count() == 1): + filters = general_handler.queryset_manager.filters + filters["taxonomy"] = { + "scientific_name": scientific_name + } + + # Taxonomy has to be fetched before any further filters are applied + queryset = general_handler.queryset_manager.get_queryset(only_main_endpoint=True) + if queryset.count() == 0: + raise EmptyQuerysetError( + f"Failed to find Taxonomy node with scientific name '{scientific_name}'" + ) + if queryset.count() > 1: + raise ExpectedUniqueError( + f"Found more than one Taxonomy node with scientific name '{scientific_name}'" + ) + + # The queryset contains the taxonomy object matching the scientific_name + # the counters apply to the full dataset so we only return this data if + # there are no other endpoints in the request + if general_handler.queryset_manager.is_single_endpoint(): return queryset - elif (queryset.count() == 0): - raise EmptyQuerysetError(f"Failed to find Taxonomy node with scientific name '{scientific_name}'") - elif (queryset.count() > 1): - raise ExpectedUniqueError(f"Found more than one Taxonomy node with scientific name '{scientific_name}'") + + # The counts for a member database can be fetched from TaxonomyPerEntryDB + if 'entry' in filters and bool(filters.get('entry')): + filtered_queryset= TaxonomyPerEntryDB.objects.filter( + taxonomy=queryset.first().accession, + source_database=filters.get('entry')["source_database"] + ) + if len(filtered_queryset) == 0: + raise EmptyQuerysetError( + f"Failed to find Taxonomy count associated with scientific name '{scientific_name}' and filters" + ) + elif len(filtered_queryset) > 1: + raise ExpectedUniqueError( + f"Found more than one Taxonomy count with scientific name '{scientific_name}' and filters" + ) + return filtered_queryset + + +def add_taxonomy_names(value, current_payload): + names = {} + to_query = set() + for taxon in current_payload: + names[taxon["metadata"]["accession"]] = taxon["metadata"]["name"] + if "parent" in taxon["metadata"] and taxon["metadata"]["parent"] is not None: + to_query.add(taxon["metadata"]["parent"]) + if ( + "children" in taxon["metadata"] + and taxon["metadata"]["children"] is not None + ): + for child in taxon["metadata"]["children"]: + to_query.add(child) + if "extra_fields" in taxon and "lineage" in taxon["extra_fields"]: + for tax in taxon["extra_fields"]["lineage"].split(): + to_query.add(tax) + qs = Taxonomy.objects.filter(accession__in=[q for q in to_query if q not in names]) + for t in qs: + names[t.accession] = t.scientific_name + return names + + +def extra_features(value, general_handler): + features = ProteinExtraFeatures.objects.filter( + protein_acc__in=general_handler.queryset_manager.get_queryset() + ) + payload = {} + for feature in features: + if feature.entry_acc not in payload: + payload[feature.entry_acc] = { + "accession": feature.entry_acc, + "source_database": feature.source_database, + "locations": [], + } + payload[feature.entry_acc]["locations"].append( + { + "fragments": [ + { + "start": feature.location_start, + "end": feature.location_end, + "seq_feature": feature.sequence_feature, + } + ] + } + ) + return payload + + +def residues(value, general_handler): + residues = ProteinResidues.objects.filter( + protein_acc__in=general_handler.queryset_manager.get_queryset() + ) + payload = {} + for residue in residues: + if residue.entry_acc not in payload: + payload[residue.entry_acc] = { + "accession": residue.entry_acc, + "source_database": residue.source_database, + "name": residue.entry_name, + "locations": [], + } + payload[residue.entry_acc]["locations"].append( + { + "description": residue.description, + "fragments": [ + {"residues": f[0], "start": f[1], "end": f[2]} + for f in residue.fragments + ], + } + ) + return payload def passing(x, y): pass From ece64cef40f9f5307b6b039046f1e11ab9bcf8e0 Mon Sep 17 00:00:00 2001 From: CodeGit Date: Mon, 1 Feb 2021 15:30:05 +0000 Subject: [PATCH 08/12] added error message for queries that cannot currently be handled --- webfront/views/modifiers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 2c9c4641..5021a847 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -750,6 +750,8 @@ def get_taxonomy_by_scientific_name(scientific_name, general_handler): f"Found more than one Taxonomy count with scientific name '{scientific_name}' and filters" ) return filtered_queryset + else: + raise URLError('scientific_name modifier currently only works with taxonomy endpoint and entry filter') def add_taxonomy_names(value, current_payload): names = {} From 42941ff07c4a288fbdeaf2200ba2fafd19b16d1d Mon Sep 17 00:00:00 2001 From: CodeGit Date: Fri, 5 Feb 2021 11:06:35 +0000 Subject: [PATCH 09/12] removed leftover lines from merge --- webfront/views/modifiers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 5021a847..7bb331b1 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -826,11 +826,6 @@ def residues(value, general_handler): ) return payload - qs = Taxonomy.objects.filter(accession__in=[q for q in to_query if q not in names]) - for t in qs: - names[t.accession] = t.scientific_name - return names - def extra_features(value, general_handler): features = ProteinExtraFeatures.objects.filter( From 438773b465a1ded0e215169146483b4294ee05ef Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 5 Feb 2021 11:46:01 +0000 Subject: [PATCH 10/12] adding structure model and contacts modifiers --- webfront/migrations/0014_structural_model.py | 21 ++++++++ webfront/models/interpro_new.py | 10 ++++ webfront/tests/README.md | 51 ++++++++++++++++++++ webfront/tests/fixtures_structure.json | 10 ++++ webfront/tests/tests_modifiers.py | 22 +++++++++ webfront/views/custom.py | 13 +++-- webfront/views/entry.py | 13 +++++ webfront/views/modifiers.py | 32 ++++++++++++ 8 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 webfront/migrations/0014_structural_model.py create mode 100644 webfront/tests/README.md diff --git a/webfront/migrations/0014_structural_model.py b/webfront/migrations/0014_structural_model.py new file mode 100644 index 00000000..fb95b055 --- /dev/null +++ b/webfront/migrations/0014_structural_model.py @@ -0,0 +1,21 @@ +# Generated by Django 3.0.7 on 2021-02-05 10:34 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [("webfront", "0013_protein_extra")] + + operations = [ + migrations.CreateModel( + name="StructuralModel", + fields=[ + ("model_id", models.IntegerField(primary_key=True, serialize=False)), + ("accession", models.CharField(max_length=25)), + ("contacts", models.BinaryField()), + ("structure", models.BinaryField()), + ], + options={"db_table": "webfront_structuralmodel"}, + ) + ] diff --git a/webfront/models/interpro_new.py b/webfront/models/interpro_new.py index 00033ec1..5764637a 100644 --- a/webfront/models/interpro_new.py +++ b/webfront/models/interpro_new.py @@ -225,3 +225,13 @@ class Isoforms(models.Model): class Meta: db_table = "webfront_varsplic" + + +class StructuralModel(models.Model): + model_id = models.IntegerField(primary_key=True) + accession = models.CharField(max_length=25, null=False) + contacts = models.BinaryField() + structure = models.BinaryField() + + class Meta: + db_table = "webfront_structuralmodel" diff --git a/webfront/tests/README.md b/webfront/tests/README.md new file mode 100644 index 00000000..e7250849 --- /dev/null +++ b/webfront/tests/README.md @@ -0,0 +1,51 @@ +Testing +=== + +Generating fixtures for gzip BinaryFields +--- +This in an example of how to generate the fixtures file for `StructuralModel`. The same approach was used for the sequence in the `Protein` table. + +1. Go the djano shell. Make sure you are using the test DB by setting `use_test_db: true` in `interpro.local.yml`. + ```shell + python3 manage.py shell + ``` + +2. Manually create the fixture, using `gzip` and `bytes` for the binary fields + ```python + import gzip + from webfront.models import StructuralModel + + contacts = "[[1,11,0.5], [2,30,0.8], [3,30,0.8]]" + contacts_gz = gzip.compress(bytes(contacts,'utf-8')) + structure = """ATOM 1 N VAL A 1 -0.701 1.770 1.392 1.00 4.92 N + ATOM 2 CA VAL A 1 0.691 2.052 1.718 1.00 4.92 C + ATOM 3 C VAL A 1 1.384 2.880 0.637 1.00 4.92 C + ATOM 4 O VAL A 1 0.991 2.879 -0.532 1.00 4.92 O + """ + structure_gz = gzip.compress(bytes(structure,'utf-8')) + model = StructuralModel(model_id=1,accession='PF17176',contacts=contacts_gz,structure=structure_gz) + + model.save() + ``` + +3. Generate the fixture using the `dumpdata` tool in django: + ```shell + python manage.py dumpdata webfront --indent 4 + ``` + ```json + [ + { + "model": "webfront.structuralmodel", + "pk": 1, + "fields": { + "accession": "PF17176", + "contacts": "H4sIAOAnHWAC/4uONtQxNNQx0DON1VGINtIxNgCyLUBsYxg7FgCIHLXIJAAAAA==", + "structure": "H4sIAOcnHWAC/42QMQ7DMAhF95yCCxRhOy54RFnbZIly/6MEcBWpSi2V5X9s/Qe27tsbohLAanLoC7S3Xg9CJvcJmSm0tOxC1s3o/irLT3oB7WbRGxAIn819Rqq5g5MMgMsXsMTBDWgbyRxAEeoDCv8FtNQGvzZsnw2FW3xBLaMnW346Aa6DA5lEAQAA" + } + } + ] + + ``` +4. Now you can use the generated JSON to included in one of the fixture files in `webfront/tests/`. + + In this example the generated fixture is included at the end of `webfront/tests/fixtures_structure.json`. diff --git a/webfront/tests/fixtures_structure.json b/webfront/tests/fixtures_structure.json index c3da74b6..390b5d0a 100644 --- a/webfront/tests/fixtures_structure.json +++ b/webfront/tests/fixtures_structure.json @@ -217,5 +217,15 @@ "sets": 0 } } + }, + { + "model": "webfront.structuralmodel", + "pk": 1, + "fields": { + "accession": "PF17176", + "contacts": "H4sIAOAnHWAC/4uONtQxNNQx0DON1VGINtIxNgCyLUBsYxg7FgCIHLXIJAAAAA==", + "structure": "H4sIAOcnHWAC/42QMQ7DMAhF95yCCxRhOy54RFnbZIly/6MEcBWpSi2V5X9s/Qe27tsbohLAanLoC7S3Xg9CJvcJmSm0tOxC1s3o/irLT3oB7WbRGxAIn819Rqq5g5MMgMsXsMTBDWgbyRxAEeoDCv8FtNQGvzZsnw2FW3xBLaMnW346Aa6DA5lEAQAA" + } } + ] diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 636129c3..1841a176 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -1,4 +1,6 @@ import unittest +import gzip +import json from webfront.tests.InterproRESTTestCase import InterproRESTTestCase from rest_framework import status import ssl @@ -443,3 +445,23 @@ def test_residue_modifier(self): self.assertEqual(response2.status_code, status.HTTP_200_OK) self.assertIn("residue", response2.data) self.assertIn("locations", response2.data["residue"]) + + +class StructuralModelTest(InterproRESTTestCase): + def test_model_structure_modifier(self): + response = self.client.get("/api/entry/pfam/PF17176?model:structure") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.charset, "utf-8") + self.assertEqual(response["content-type"], "chemical/x-pdb") + content = gzip.decompress(response.content) + self.assertIn("ATOM", str(content)) + + def test_model_contacts_modifier(self): + response = self.client.get("/api/entry/pfam/PF17176?model:contacts") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.charset, "utf-8") + self.assertEqual(response["content-type"], "application/json") + content = gzip.decompress(response.content) + data = json.loads(content) + self.assertEqual(3, len(data)) + self.assertEqual(3, len(data[0])) diff --git a/webfront/views/custom.py b/webfront/views/custom.py index 118768c2..4ee6735a 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -91,10 +91,15 @@ def get( mime_type = annotation.mime_type value = annotation.value response = HttpResponse(value, content_type=mime_type) - if annotation.type.startswith("alignment:"): - response["Content-Encoding"] = "gzip" - if "download" not in request.GET: - response["Content-Type"] = "text/plain" + if annotation.type.startswith( + "alignment:" + ) or annotation.type.startswith("model:"): + if "download" in request.GET: + response["Content-Type"] = "application/gzip" + else: + response["Content-Encoding"] = "gzip" + if "gzip" in mime_type: + response["Content-Type"] = "text/plain" return response general_handler.filter_serializers = [] self.search_size = general_handler.modifiers.search_size diff --git a/webfront/views/entry.py b/webfront/views/entry.py index 4b88432c..b4fbd89d 100644 --- a/webfront/views/entry.py +++ b/webfront/views/entry.py @@ -17,6 +17,7 @@ ida_search, filter_by_latest_entries, get_value_for_field, + get_model, ) from .custom import CustomView, SerializerDetail from django.conf import settings @@ -49,6 +50,18 @@ def get( "entry", accession__iexact=endpoint_levels[level - 1].lower() ) + general_handler.modifiers.register( + "model:structure", + get_model("structure"), + type=ModifierType.REPLACE_PAYLOAD, + serializer=SerializerDetail.ANNOTATION_BLOB, + ) + general_handler.modifiers.register( + "model:contacts", + get_model("contacts"), + type=ModifierType.REPLACE_PAYLOAD, + serializer=SerializerDetail.ANNOTATION_BLOB, + ) general_handler.modifiers.register( "annotation", get_entry_annotation, diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 70e23871..c3560ca6 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -13,6 +13,7 @@ Taxonomy, ProteinExtraFeatures, ProteinResidues, + StructuralModel, ) from webfront.views.custom import filter_queryset_accession_in from webfront.exceptions import EmptyQuerysetError, HmmerWebError, ExpectedUniqueError @@ -804,5 +805,36 @@ def residues(value, general_handler): return payload +def get_model(type): + def get_model_structure(value, general_handler): + entry = general_handler.queryset_manager.get_queryset() + if len(entry) == 0: + raise EmptyQuerysetError( + "There is are not entries with the given accession" + ) + queryset = StructuralModel.objects.filter(accession=entry.first().accession) + if len(queryset) == 0: + raise EmptyQuerysetError("The selected entry doesn't have a linked model") + + annotation = queryset.first() + + payload = lambda: None + payload.accession = annotation.accession + payload.type = "model:pdb" + if type == "structure": + payload.mime_type = "chemical/x-pdb" + payload.value = annotation.structure + elif type == "contacts": + payload.mime_type = "application/json" + payload.value = annotation.contacts + return [payload] + + return get_model_structure + + +def get_model_contacts(value, general_handler): + return payload + + def passing(x, y): pass From 8eb4db372cd2f938f6eab6598c4bc89aaeb5e41e Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 5 Feb 2021 14:01:02 +0000 Subject: [PATCH 11/12] removing duplicated methods --- webfront/views/modifiers.py | 75 +++++++------------------------------ 1 file changed, 13 insertions(+), 62 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 7bb331b1..2f30f37f 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -714,9 +714,7 @@ def x(value, general_handler): def get_taxonomy_by_scientific_name(scientific_name, general_handler): filters = general_handler.queryset_manager.filters - filters["taxonomy"] = { - "scientific_name": scientific_name - } + filters["taxonomy"] = {"scientific_name": scientific_name} # Taxonomy has to be fetched before any further filters are applied queryset = general_handler.queryset_manager.get_queryset(only_main_endpoint=True) @@ -728,18 +726,18 @@ def get_taxonomy_by_scientific_name(scientific_name, general_handler): raise ExpectedUniqueError( f"Found more than one Taxonomy node with scientific name '{scientific_name}'" ) - + # The queryset contains the taxonomy object matching the scientific_name - # the counters apply to the full dataset so we only return this data if + # the counters apply to the full dataset so we only return this data if # there are no other endpoints in the request if general_handler.queryset_manager.is_single_endpoint(): return queryset - + # The counts for a member database can be fetched from TaxonomyPerEntryDB - if 'entry' in filters and bool(filters.get('entry')): - filtered_queryset= TaxonomyPerEntryDB.objects.filter( + if "entry" in filters and bool(filters.get("entry")): + filtered_queryset = TaxonomyPerEntryDB.objects.filter( taxonomy=queryset.first().accession, - source_database=filters.get('entry')["source_database"] + source_database=filters.get("entry")["source_database"], ) if len(filtered_queryset) == 0: raise EmptyQuerysetError( @@ -751,7 +749,10 @@ def get_taxonomy_by_scientific_name(scientific_name, general_handler): ) return filtered_queryset else: - raise URLError('scientific_name modifier currently only works with taxonomy endpoint and entry filter') + raise URLError( + "scientific_name modifier currently only works with taxonomy endpoint and entry filter" + ) + def add_taxonomy_names(value, current_payload): names = {} @@ -803,11 +804,11 @@ def extra_features(value, general_handler): def residues(value, general_handler): - residues = ProteinResidues.objects.filter( + residues_qs = ProteinResidues.objects.filter( protein_acc__in=general_handler.queryset_manager.get_queryset() ) payload = {} - for residue in residues: + for residue in residues_qs: if residue.entry_acc not in payload: payload[residue.entry_acc] = { "accession": residue.entry_acc, @@ -827,55 +828,5 @@ def residues(value, general_handler): return payload -def extra_features(value, general_handler): - features = ProteinExtraFeatures.objects.filter( - protein_acc__in=general_handler.queryset_manager.get_queryset() - ) - payload = {} - for feature in features: - if feature.entry_acc not in payload: - payload[feature.entry_acc] = { - "accession": feature.entry_acc, - "source_database": feature.source_database, - "locations": [], - } - payload[feature.entry_acc]["locations"].append( - { - "fragments": [ - { - "start": feature.location_start, - "end": feature.location_end, - "seq_feature": feature.sequence_feature, - } - ] - } - ) - return payload - - -def residues(value, general_handler): - residues = ProteinResidues.objects.filter( - protein_acc__in=general_handler.queryset_manager.get_queryset() - ) - payload = {} - for residue in residues: - if residue.entry_acc not in payload: - payload[residue.entry_acc] = { - "accession": residue.entry_acc, - "source_database": residue.source_database, - "name": residue.entry_name, - "locations": [], - } - payload[residue.entry_acc]["locations"].append( - { - "description": residue.description, - "fragments": [ - {"residues": f[0], "start": f[1], "end": f[2]} - for f in residue.fragments - ], - } - ) - return payload - def passing(x, y): pass From 28cca3eee858f3b85d17347df29a317bfd052dd8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 8 Feb 2021 16:05:05 +0000 Subject: [PATCH 12/12] removing unused coded left by mistake e --- webfront/views/modifiers.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index c3560ca6..11caf175 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -832,9 +832,5 @@ def get_model_structure(value, general_handler): return get_model_structure -def get_model_contacts(value, general_handler): - return payload - - def passing(x, y): pass