From d5c2f1cf9f57c0c9fb796e26f473e4b551c70292 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Sun, 25 Sep 2022 22:09:06 +0100 Subject: [PATCH 1/7] Use pLDDT score to filter proteins with an AlphaFold model --- webfront/views/modifiers.py | 16 ++++++++++++++++ webfront/views/protein.py | 3 ++- webfront/views/queryset_manager.py | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 568279f7..cd1c66ff 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -276,6 +276,22 @@ def filter_by_entry_db(value, general_handler): return response.first() +def filter_by_min_value(endpoint, field, value, sort_direction=None): + def x(_, general_handler): + general_handler.queryset_manager.add_filter( + endpoint, + **{ + "{}__gte".format(field): value + }, + ) + if sort_direction in ("asc", "desc"): + general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) + elif sort_direction is not None: + raise ValueError("{} is not a valid sorting order".format(sort_direction)) + + return x + + def filter_by_boolean_field(endpoint, field): def x(value, general_handler): if value.lower() == "false": diff --git a/webfront/views/protein.py b/webfront/views/protein.py index b54bd4c6..e08e6528 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -12,6 +12,7 @@ filter_by_domain_architectures, filter_by_contains_field, filter_by_match_presence, + filter_by_min_value, add_extra_fields, get_isoforms, calculate_residue_conservation, @@ -281,7 +282,7 @@ def get( "is_fragment", filter_by_boolean_field("protein", "is_fragment") ) general_handler.modifiers.register( - "has_model", filter_by_boolean_field("protein", "has_model") + "has_model", filter_by_min_value("protein", "protein_af_score", 0, "desc") ) return super(ProteinHandler, self).get( diff --git a/webfront/views/queryset_manager.py b/webfront/views/queryset_manager.py index ba6da3a9..0e767fba 100644 --- a/webfront/views/queryset_manager.py +++ b/webfront/views/queryset_manager.py @@ -132,7 +132,7 @@ def get_searcher_query(self, include_search=False, use_lineage=False): ) elif k == "type" or k == "type__iexact" or k == "type__exact": blocks.append("{}_type:{}".format(ep, escape(v))) - elif k in ("is_fragment", "has_model"): + elif k == "is_fragment": blocks.append("{}_{}:{}".format(ep, k, escape(v))) elif k == "tax_id" or k == "tax_id__iexact" or k == "tax_id__contains": blocks.append("tax_id:{}".format(escape(v))) From 7ed9c5a3eeb1bbaa619f13cdcb2deb86643d4e2b Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Mon, 26 Sep 2022 14:12:01 +0100 Subject: [PATCH 2/7] Force sort_direction to be asc or desc --- webfront/views/modifiers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index cd1c66ff..6104ae96 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -276,7 +276,7 @@ def filter_by_entry_db(value, general_handler): return response.first() -def filter_by_min_value(endpoint, field, value, sort_direction=None): +def filter_by_min_value(endpoint, field, value, sort_direction="asc"): def x(_, general_handler): general_handler.queryset_manager.add_filter( endpoint, @@ -286,7 +286,7 @@ def x(_, general_handler): ) if sort_direction in ("asc", "desc"): general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) - elif sort_direction is not None: + else: raise ValueError("{} is not a valid sorting order".format(sort_direction)) return x From b6dabe97b618a657ade2fb53f1751ddcea039ec4 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Mon, 26 Sep 2022 17:36:42 +0100 Subject: [PATCH 3/7] Enable custom ordering of buckets in composite aggregations --- webfront/searcher/elastic_controller.py | 117 +++++++++++++++++------- 1 file changed, 83 insertions(+), 34 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index 53b76cfa..dc21d95c 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -12,14 +12,44 @@ es_results = list() +def parseCursor(cursor): + fields = {} + for item in cursor.split(","): + k, t, v = item.split(":") + if t == "f": + fields[k] = float(v) + elif t == "i": + fields[k] = int(v) + else: + fields[k] = v.lower() + + return fields + + +def encodeCursor(keys): + if not keys: + return None + output = [] + for k, v in keys.items(): + if isinstance(v, float): + t = "f" + elif isinstance(v, int): + t = "i" + else: + t = "s" + output.append("{}:{}:{}".format(k, t, v)) + + return ",".join(output) + + def getAfterBeforeFromCursor(cursor): - after = None - before = None + after = {} + before = {} if cursor is not None: if cursor[0] == "-": - before = cursor[1:] + before = parseCursor(cursor[1:]) else: - after = cursor + after = parseCursor(cursor) return after, before @@ -276,7 +306,7 @@ def get_group_obj_copy_of_field_by_query( "size": 0, } after, before = getAfterBeforeFromCursor(cursor) - self.addAfterKeyToQueryComposite( + reset_direction = self.addAfterKeyToQueryComposite( facet["aggs"]["groups"]["composite"], after, before ) if inner_field_to_count is not None: @@ -286,6 +316,8 @@ def get_group_obj_copy_of_field_by_query( if fq is not None: query += " && " + fq response = self._elastic_json_query(query, facet) + if reset_direction: + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, query) before_key = self.getBeforeKey(response, facet, before, query) buckets = response["aggregations"]["groups"]["buckets"] @@ -325,8 +357,24 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No }, "size": 0, } + + # Sort buckets by custom field + match = re.search(r"&?sort=(\w+):(\w+)", qs) + if match: + field, direction = match.groups() + if field != facet["aggs"]["ngroups"]["cardinality"]["field"]: + # Custom field takes priority over default one ('source') + facet["aggs"]["groups"]["composite"]["sources"].insert(0, { + field: { + "terms": { + "field": field, + "order": direction + } + } + }) + after, before = getAfterBeforeFromCursor(cursor) - self.addAfterKeyToQueryComposite( + reset_direction = self.addAfterKeyToQueryComposite( facet["aggs"]["groups"]["composite"], after, before ) if endpoint == "organism" or endpoint == "taxonomy": @@ -345,6 +393,8 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No str(x["key"]["source"]).lower() for x in response["aggregations"]["groups"]["buckets"] ] + if reset_direction: + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) return accessions, count, after_key, before_key @@ -452,53 +502,52 @@ def _elastic_json_query(self, q, query_obj=None, is_ida=False): return obj def addAfterKeyToQueryComposite(self, composite, after, before): - if after is not None: - composite["after"] = {"source": after.lower()} - elif before is not None: - composite["after"] = {"source": before.lower()} - composite["sources"][0]["source"]["terms"]["order"] = "desc" + if after: + composite["after"] = after + return False + elif before: + composite["after"] = before + self.reverseOrderDirection(composite) + return True def getAfterKey(self, response, facet, before, qs): after_key = None - if before is not None: + if before: try: - after_key = response["aggregations"]["groups"]["buckets"][0]["key"][ - "source" - ] + after_key = response["aggregations"]["groups"]["buckets"][0]["key"] except: pass elif "after_key" in response["aggregations"]["groups"]: - after_key = response["aggregations"]["groups"]["after_key"]["source"] + after_key = response["aggregations"]["groups"]["after_key"] if after_key is not None: - facet["aggs"]["groups"]["composite"]["after"] = {"source": after_key} - facet["aggs"]["groups"]["composite"]["sources"][0]["source"]["terms"][ - "order" - ] = "asc" + facet["aggs"]["groups"]["composite"]["after"] = after_key next_response = self._elastic_json_query(qs, facet) if len(next_response["aggregations"]["groups"]["buckets"]) == 0: after_key = None - return after_key + return encodeCursor(after_key) def getBeforeKey(self, response, facet, before, qs): before_key = None try: - if before is not None: - before_key = response["aggregations"]["groups"]["buckets"][-1]["key"][ - "source" - ] + if before: + before_key = response["aggregations"]["groups"]["buckets"][-1]["key"] else: - before_key = response["aggregations"]["groups"]["buckets"][0]["key"][ - "source" - ] + before_key = response["aggregations"]["groups"]["buckets"][0]["key"] except: pass - if before_key is not None: - facet["aggs"]["groups"]["composite"]["after"] = {"source": before_key} - facet["aggs"]["groups"]["composite"]["sources"][0]["source"]["terms"][ - "order" - ] = "desc" + if before_key: + facet["aggs"]["groups"]["composite"]["after"] = before_key + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) prev_response = self._elastic_json_query(qs, facet) if len(prev_response["aggregations"]["groups"]["buckets"]) == 0: before_key = None - return before_key + return encodeCursor(before_key) + + def reverseOrderDirection(self, composite): + for field in composite["sources"]: + for k, v in field.items(): + if v["terms"].get("order", "asc") == "asc": + v["terms"]["order"] = "desc" + else: + v["terms"]["order"] = "asc" From 17feca7b79630b91a453e7afa69b07454e3b18d6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 11:08:23 +0100 Subject: [PATCH 4/7] sorting the payload at pagination time based on elastic --- webfront/pagination.py | 26 +++++++++++++++++++++++- webfront/views/custom.py | 32 ++++++++++++++++++++---------- webfront/views/queryset_manager.py | 4 ++++ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/webfront/pagination.py b/webfront/pagination.py index c7b7615b..690b94b9 100644 --- a/webfront/pagination.py +++ b/webfront/pagination.py @@ -31,19 +31,39 @@ class CustomPagination(CursorPagination): current_size = None after_key = None before_key = None + elastic_result = None def get_paginated_response(self, data): base = [ ("count", self.current_size), ("next", self.get_next_link()), ("previous", self.get_previous_link()), - ("results", data["data"]), + ("results", self._sortBasedOnElastic(data["data"])), ] if "extensions" in data and len(data["extensions"]) > 0: for ext in data["extensions"]: base.append((ext, data["extensions"][ext])) return Response(OrderedDict(base)) + # If there is data in elastic_result, implies that the wueryset was created by querying elastic first. + # This method uses the list of accession retrieved via elastic to order the results. + def _sortBasedOnElastic(self, data): + if self.elastic_result is None: + return data + ordered_data = [] + for acc in self.elastic_result: + obj = next( + filter( + lambda item: item.get("metadata", {}).get("accession", "").lower() + == acc.lower(), + data, + ), + None, + ) + if obj is not None: + ordered_data.append(obj) + return ordered_data + def _get_position_from_instance(self, instance, ordering): if type(instance) == tuple: return instance[0] @@ -51,9 +71,11 @@ def _get_position_from_instance(self, instance, ordering): instance, ordering ) + # Extract some values passed as kwargs before invoking the implementation in the super class def paginate_queryset(self, queryset, request, **kwargs): self.current_size = None self.after_key = None + self.elastic_result = None if ( hasattr(queryset, "model") and queryset.model._meta.ordering != [] @@ -69,6 +91,8 @@ def paginate_queryset(self, queryset, request, **kwargs): self.after_key = kwargs["after_key"] if "before_key" in kwargs and kwargs["before_key"] is not None: self.before_key = kwargs["before_key"] + if "elastic_result" in kwargs and kwargs["elastic_result"] is not None: + self.elastic_result = kwargs["elastic_result"] return super(CustomPagination, self).paginate_queryset( queryset, request, kwargs["view"] ) diff --git a/webfront/views/custom.py b/webfront/views/custom.py index fdb2c61c..05e7cf4d 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -50,6 +50,7 @@ class CustomView(GenericAPIView): serializer_detail_filter = SerializerDetail.ALL after_key = None before_key = None + elastic_result = None http_method_names = ["get", "head"] def get( @@ -68,13 +69,15 @@ def get( # if this is the last level if len(endpoint_levels) == level: searcher = general_handler.searcher + # Executes all the modifiers, some add filters to the query set but others might replace it. has_payload = general_handler.modifiers.execute(drf_request) logger.debug(request.get_full_path()) + # If there is a payload from the modifiers, it has its own serializer if has_payload or general_handler.modifiers.serializer is not None: self.serializer_detail = general_handler.modifiers.serializer - # self.many = general_handler.modifiers.many if general_handler.modifiers.many is not None: self.many = general_handler.modifiers.many + # When there is a payload from the modifiers. It should build the response with it if has_payload: if general_handler.modifiers.payload is None: raise EmptyQuerysetError( @@ -91,8 +94,9 @@ def get( mime_type = annotation.mime_type anno_type = annotation.type anno_value = annotation.value - response = HttpResponse(content=anno_value, - content_type=mime_type) + response = HttpResponse( + content=anno_value, content_type=mime_type + ) if anno_type.startswith(("alignment:", "model:")): if "download" in request.GET: @@ -117,6 +121,8 @@ def get( ) elif self.from_model: + # Single endpoints don't require Elasticsearch, so they can be resolved + # by normal Django means(i.e. just the MySQL model) if ( is_single_endpoint(general_handler) or not self.expected_response_is_list() @@ -126,13 +132,10 @@ def get( ) self.search_size = self.queryset.count() else: + # It uses multiple endpoints, so we need to use the elastic index self.update_queryset_from_search(searcher, general_handler) if self.queryset.count() == 0: - # if 0 == general_handler.queryset_manager.get_queryset(only_main_endpoint=True).count(): - # raise Exception("The URL requested didn't have any data related.\nList of endpoints: {}" - # .format(endpoint_levels)) - raise EmptyQuerysetError( "There is no data associated with the requested URL.\nList of endpoints: {}".format( endpoint_levels @@ -143,15 +146,17 @@ def get( self.get_queryset(), drf_request, view=self, + # passing data of the elastic result to the pagination instance search_size=self.search_size, after_key=self.after_key, before_key=self.before_key, + elastic_result=self.elastic_result, ) else: self.queryset = self.get_queryset().first() else: if endpoint_levels[0] != "utils": - # if it gets here it is a endpoint request checking for database contributions. + # if it gets here it is an endpoint request checking for database contributions. self.queryset = self.get_counter_response(general_handler, searcher) serialized = self.serializer_class( @@ -321,22 +326,27 @@ def expected_response_is_list(self): search_size = None + # Queries the elastic searcher core to get a list of accessions of the main endpoint, + # then builds a queryset matching those accessions. + # This is the main connection point between elastic and MySQL def update_queryset_from_search(self, searcher, general_handler): ep = general_handler.queryset_manager.main_endpoint s = general_handler.pagination["size"] cursor = general_handler.pagination["cursor"] qs = general_handler.queryset_manager.get_searcher_query(include_search=True) - res, length, after_key, before_key = searcher.get_list_of_endpoint( + elastic_result, length, after_key, before_key = searcher.get_list_of_endpoint( ep, rows=s, query=qs, cursor=cursor ) - self.queryset = general_handler.queryset_manager.get_base_queryset(ep) - self.queryset = filter_queryset_accession_in(self.queryset, res) + self.queryset = general_handler.queryset_manager.get_base_queryset(ep) + self.queryset = filter_queryset_accession_in(self.queryset, elastic_result) + # This values get store in the instance attributes, so they can be recovered on the pagination stage. self.search_size = length self.after_key = after_key self.before_key = before_key + self.elastic_result = elastic_result def filter_queryset_accession_in(queryset, list): diff --git a/webfront/views/queryset_manager.py b/webfront/views/queryset_manager.py index 0e767fba..d424b01f 100644 --- a/webfront/views/queryset_manager.py +++ b/webfront/views/queryset_manager.py @@ -69,6 +69,8 @@ def remove_filter(self, endpoint, f): def order_by(self, field): self.order_field = field + # Generates a query string for elasticsearch from the registered queryset filters. + # It explicitely goes through all the filters and create the query string case by case. def get_searcher_query(self, include_search=False, use_lineage=False): blocks = [] for ep in self.filters: @@ -162,9 +164,11 @@ def get_searcher_query(self, include_search=False, use_lineage=False): if k == "source_database" or k == "source_database__iexact": blocks.append("{}_db:{}".format(ep, escape(v))) + # Normalizes the blocks(sorts and lower) and joins them with ' && '. blocks = list(set(blocks)) blocks.sort() q = " && ".join(blocks).lower() + if self.order_field is not None: q += "&sort=" + self.order_field return q From c323ec910cda48fca3327c7fbed32d003b86e958 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 12:12:36 +0100 Subject: [PATCH 5/7] only applying the elastic order when extra order is impose in the search --- webfront/searcher/elastic_controller.py | 4 +++- webfront/views/custom.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index dc21d95c..d20e701a 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -340,6 +340,7 @@ def get_group_obj_copy_of_field_by_query( return output def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=None): + should_keep_elastic_order = False qs = self.queryset_manager.get_searcher_query() if query is None else query if qs == "": qs = "*:*" @@ -372,6 +373,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No } } }) + should_keep_elastic_order = True after, before = getAfterBeforeFromCursor(cursor) reset_direction = self.addAfterKeyToQueryComposite( @@ -397,7 +399,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) - return accessions, count, after_key, before_key + return accessions, count, after_key, before_key,should_keep_elastic_order def get_chain(self): qs = self.queryset_manager.get_searcher_query() diff --git a/webfront/views/custom.py b/webfront/views/custom.py index 05e7cf4d..b94e208f 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -335,7 +335,7 @@ def update_queryset_from_search(self, searcher, general_handler): cursor = general_handler.pagination["cursor"] qs = general_handler.queryset_manager.get_searcher_query(include_search=True) - elastic_result, length, after_key, before_key = searcher.get_list_of_endpoint( + elastic_result, length, after_key, before_key, should_keep_elastic_order = searcher.get_list_of_endpoint( ep, rows=s, query=qs, cursor=cursor ) @@ -346,7 +346,7 @@ def update_queryset_from_search(self, searcher, general_handler): self.search_size = length self.after_key = after_key self.before_key = before_key - self.elastic_result = elastic_result + self.elastic_result = elastic_result if should_keep_elastic_order else None def filter_queryset_accession_in(queryset, list): From 316b8b0fd9b457f5b196509d5b36a951e07382b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 14:46:01 +0100 Subject: [PATCH 6/7] cleaning up my mess --- webfront/searcher/elastic_controller.py | 2 +- webfront/views/modifiers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index d20e701a..8eb9b69f 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -399,7 +399,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) - return accessions, count, after_key, before_key,should_keep_elastic_order + return accessions, count, after_key, before_key, should_keep_elastic_order def get_chain(self): qs = self.queryset_manager.get_searcher_query() diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index cf74f597..07a59e86 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -420,7 +420,7 @@ def filter_by_domain_architectures(field, general_handler): general_handler.queryset_manager.get_searcher_query() + " && ida_id:" + field ) endpoint = general_handler.queryset_manager.main_endpoint - res, length, after_key, before_key = searcher.get_list_of_endpoint( + res, length, after_key, before_key, _ = searcher.get_list_of_endpoint( endpoint, rows=size, query=query, cursor=cursor ) general_handler.modifiers.search_size = length From 9eeeb6a1a9e30b9beaf012f9c95a383c35061b2e Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 4 Oct 2022 13:04:49 +0100 Subject: [PATCH 7/7] cleanup and running black --- webfront/exceptions.py | 6 +- .../migrations/0010_wiki_field_type_change.py | 10 +-- .../0016_structural_model_algorithm.py | 14 ++- .../migrations/0017_structural_model_plddt.py | 10 +-- webfront/migrations/0018_taxa_modifier.py | 10 +-- webfront/migrations/0019_entrytaxa_table.py | 28 +++--- ...020_alter_entryannotation_num_sequences.py | 10 +-- webfront/pagination.py | 58 ------------ webfront/searcher/elastic_controller.py | 63 +------------ webfront/serializers/interpro.py | 3 +- webfront/templatetags/interpro_tags.py | 5 +- webfront/tests/fixtures_reader.py | 22 +++-- webfront/tests/test_mail.py | 18 ++-- .../tests/tests_3_endpoints_using_searcher.py | 4 +- webfront/tests/tests_entry_endpoint.py | 3 +- webfront/tests/tests_modifiers.py | 25 ++++-- .../tests_protein_endpoint_entry_filter.py | 4 +- .../tests_structure_endpoint_entry_filter.py | 12 ++- ...e_payload_structure_combining_endpoints.py | 8 +- webfront/tests/tests_utils_endpoint.py | 4 +- webfront/views/mail.py | 44 ++++----- webfront/views/modifiers.py | 89 ++++++++++++------- webfront/views/utils.py | 15 ++-- 23 files changed, 193 insertions(+), 272 deletions(-) diff --git a/webfront/exceptions.py b/webfront/exceptions.py index 3d1e93eb..9882cf7d 100644 --- a/webfront/exceptions.py +++ b/webfront/exceptions.py @@ -10,18 +10,22 @@ class EmptyQuerysetError(Exception): def __init__(self, message): self.message = message + class ExpectedUniqueError(Exception): def __init__(self, message): self.message = message + class HmmerWebError(Exception): def __init__(self, message): self.message = message + class BadURLParameterError(Exception): def __init__(self, message): self.message = message + class InvalidOperationRequest(Exception): def __init__(self, message): - self.message = message \ No newline at end of file + self.message = message diff --git a/webfront/migrations/0010_wiki_field_type_change.py b/webfront/migrations/0010_wiki_field_type_change.py index 624da118..0e0f02e4 100644 --- a/webfront/migrations/0010_wiki_field_type_change.py +++ b/webfront/migrations/0010_wiki_field_type_change.py @@ -6,14 +6,12 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0009_entry_annotation_changes'), - ] + dependencies = [("webfront", "0009_entry_annotation_changes")] operations = [ migrations.AlterField( - model_name='entry', - name='wikipedia', + model_name="entry", + name="wikipedia", field=jsonfield.fields.JSONField(null=True), - ), + ) ] diff --git a/webfront/migrations/0016_structural_model_algorithm.py b/webfront/migrations/0016_structural_model_algorithm.py index 65dd959f..7191cac6 100644 --- a/webfront/migrations/0016_structural_model_algorithm.py +++ b/webfront/migrations/0016_structural_model_algorithm.py @@ -5,20 +5,16 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0015_structural_model_lddt'), - ] + dependencies = [("webfront", "0015_structural_model_lddt")] operations = [ migrations.AddField( - model_name='structuralmodel', - name='algorithm', - field=models.CharField(default='trRosetta', max_length=20), + model_name="structuralmodel", + name="algorithm", + field=models.CharField(default="trRosetta", max_length=20), preserve_default=False, ), migrations.AlterField( - model_name='structuralmodel', - name='lddt', - field=models.BinaryField(), + model_name="structuralmodel", name="lddt", field=models.BinaryField() ), ] diff --git a/webfront/migrations/0017_structural_model_plddt.py b/webfront/migrations/0017_structural_model_plddt.py index d6544ac9..ac98cec9 100644 --- a/webfront/migrations/0017_structural_model_plddt.py +++ b/webfront/migrations/0017_structural_model_plddt.py @@ -5,14 +5,10 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0016_structural_model_algorithm'), - ] + dependencies = [("webfront", "0016_structural_model_algorithm")] operations = [ migrations.RenameField( - model_name='structuralmodel', - old_name='lddt', - new_name='plddt', - ), + model_name="structuralmodel", old_name="lddt", new_name="plddt" + ) ] diff --git a/webfront/migrations/0018_taxa_modifier.py b/webfront/migrations/0018_taxa_modifier.py index 2f91e884..0e10bb2d 100644 --- a/webfront/migrations/0018_taxa_modifier.py +++ b/webfront/migrations/0018_taxa_modifier.py @@ -6,14 +6,10 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0017_structural_model_plddt'), - ] + dependencies = [("webfront", "0017_structural_model_plddt")] operations = [ migrations.AddField( - model_name='entry', - name='taxa', - field=jsonfield.fields.JSONField(null=True), - ), + model_name="entry", name="taxa", field=jsonfield.fields.JSONField(null=True) + ) ] diff --git a/webfront/migrations/0019_entrytaxa_table.py b/webfront/migrations/0019_entrytaxa_table.py index c42ca6bd..6db09203 100644 --- a/webfront/migrations/0019_entrytaxa_table.py +++ b/webfront/migrations/0019_entrytaxa_table.py @@ -7,23 +7,25 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0018_taxa_modifier'), - ] + dependencies = [("webfront", "0018_taxa_modifier")] operations = [ migrations.CreateModel( - name='EntryTaxa', + name="EntryTaxa", fields=[ - ('accession', models.OneToOneField(db_column='accession', on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='webfront.entry')), - ('tree', jsonfield.fields.JSONField(null=True)), + ( + "accession", + models.OneToOneField( + db_column="accession", + on_delete=django.db.models.deletion.CASCADE, + primary_key=True, + serialize=False, + to="webfront.entry", + ), + ), + ("tree", jsonfield.fields.JSONField(null=True)), ], - options={ - 'db_table': 'webfront_entrytaxa', - }, - ), - migrations.RemoveField( - model_name='entry', - name='taxa', + options={"db_table": "webfront_entrytaxa"}, ), + migrations.RemoveField(model_name="entry", name="taxa"), ] diff --git a/webfront/migrations/0020_alter_entryannotation_num_sequences.py b/webfront/migrations/0020_alter_entryannotation_num_sequences.py index e318c838..e8f099b7 100644 --- a/webfront/migrations/0020_alter_entryannotation_num_sequences.py +++ b/webfront/migrations/0020_alter_entryannotation_num_sequences.py @@ -5,14 +5,12 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0019_entrytaxa_table'), - ] + dependencies = [("webfront", "0019_entrytaxa_table")] operations = [ migrations.AlterField( - model_name='entryannotation', - name='num_sequences', + model_name="entryannotation", + name="num_sequences", field=models.IntegerField(null=True), - ), + ) ] diff --git a/webfront/pagination.py b/webfront/pagination.py index 690b94b9..bf0a3b54 100644 --- a/webfront/pagination.py +++ b/webfront/pagination.py @@ -129,61 +129,3 @@ def get_previous_link(self): return replace_query_param( self.base_url, "cursor", "-{}".format(self.before_key) ) - - -class CustomPaginationOld(PageNumberPagination): - page_size = settings.INTERPRO_CONFIG.get("default_page_size", 20) - page_size_query_param = "page_size" - max_page_size = 200 - ordering = "-accession" - django_paginator_class = CustomPaginator - current_size = None - - def get_paginated_response(self, data): - self.current_size = ( - self.page.paginator.count - if self.current_size is None - else self.current_size - ) - return Response( - OrderedDict( - [ - ("count", self.current_size), - ("next", self.get_next_link()), - ("previous", self.get_previous_link()), - ("results", data), - ] - ) - ) - - def paginate_queryset(self, queryset, request, **kwargs): - self.current_size = None - if "search_size" in kwargs and kwargs["search_size"] is not None: - if not queryset.ordered: - queryset = queryset.order_by("accession") - self.current_size = kwargs["search_size"] - - return super(CustomPagination, self).paginate_queryset( - queryset, request, kwargs["view"] - ) - - def get_next_link(self): - if not self.has_next(): - return None - url = replace_url_host(self.request.build_absolute_uri()) - page_number = self.page.number + 1 - return replace_query_param(url, self.page_query_param, page_number) - - def has_next(self): - if self.current_size is None: - return False - return self.page.number * self.page.paginator.per_page < self.current_size - - def get_previous_link(self): - if not self.page.has_previous(): - return None - url = replace_url_host(self.request.build_absolute_uri()) - page_number = self.page.previous_page_number() - if page_number == 1: - return remove_query_param(url, self.page_query_param) - return replace_query_param(url, self.page_query_param, page_number) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index 8eb9b69f..4dc4872b 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -287,58 +287,6 @@ def get_group_obj_of_field_by_query( return output - def get_group_obj_copy_of_field_by_query( - self, query, field, fq=None, rows=1, cursor=None, inner_field_to_count=None - ): - # TODO: change to new pagination - query = self.queryset_manager.get_searcher_query() if query is None else query - facet = { - "aggs": { - "ngroups": {"cardinality": {"field": field}}, - "groups": { - "composite": { - "size": rows, - "sources": [{"source": {"terms": {"field": field}}}], - }, - "aggs": {"tops": {"top_hits": {"size": 1}}}, - }, - }, - "size": 0, - } - after, before = getAfterBeforeFromCursor(cursor) - reset_direction = self.addAfterKeyToQueryComposite( - facet["aggs"]["groups"]["composite"], after, before - ) - if inner_field_to_count is not None: - facet["aggs"]["groups"]["aggs"]["unique"] = { - "cardinality": {"field": inner_field_to_count} - } - if fq is not None: - query += " && " + fq - response = self._elastic_json_query(query, facet) - if reset_direction: - self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) - after_key = self.getAfterKey(response, facet, before, query) - before_key = self.getBeforeKey(response, facet, before, query) - buckets = response["aggregations"]["groups"]["buckets"] - if len(buckets) > 0 and "tops" not in buckets[0]: - buckets = [b for sb in buckets for b in sb["subgroups"]["buckets"]] - output = { - "groups": [ - bucket["tops"]["hits"]["hits"][0]["_source"] for bucket in buckets - ], - "ngroups": response["aggregations"]["ngroups"], - "after_key": after_key, - "before_key": before_key, - } - if inner_field_to_count is not None: - i = 0 - for bucket in response["aggregations"]["groups"]["buckets"]: - output["groups"][i]["unique"] = bucket["unique"] - i += 1 - - return output - def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=None): should_keep_elastic_order = False qs = self.queryset_manager.get_searcher_query() if query is None else query @@ -365,14 +313,9 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No field, direction = match.groups() if field != facet["aggs"]["ngroups"]["cardinality"]["field"]: # Custom field takes priority over default one ('source') - facet["aggs"]["groups"]["composite"]["sources"].insert(0, { - field: { - "terms": { - "field": field, - "order": direction - } - } - }) + facet["aggs"]["groups"]["composite"]["sources"].insert( + 0, {field: {"terms": {"field": field, "order": direction}}} + ) should_keep_elastic_order = True after, before = getAfterBeforeFromCursor(cursor) diff --git a/webfront/serializers/interpro.py b/webfront/serializers/interpro.py index 238555e0..77f59429 100644 --- a/webfront/serializers/interpro.py +++ b/webfront/serializers/interpro.py @@ -204,8 +204,7 @@ def reformat_cross_references(cross_references): @staticmethod def to_metadata_representation(instance, searcher, sq, counters=None): results = EntryAnnotation.objects.filter(accession=instance.accession).only( - "type", - "num_sequences" + "type", "num_sequences" ) annotation_types = {x.type: x.num_sequences or 0 for x in results} if counters is None: diff --git a/webfront/templatetags/interpro_tags.py b/webfront/templatetags/interpro_tags.py index 62c7686e..546e6cc8 100644 --- a/webfront/templatetags/interpro_tags.py +++ b/webfront/templatetags/interpro_tags.py @@ -7,8 +7,11 @@ register = template.Library() + @register.simple_tag def get_url_with_prefix(request, key, val): iri = request.get_full_path() uri = iri_to_uri(iri) - return settings.INTERPRO_CONFIG["url_path_prefix"] + escape(replace_query_param(uri, key, val)) + return settings.INTERPRO_CONFIG["url_path_prefix"] + escape( + replace_query_param(uri, key, val) + ) diff --git a/webfront/tests/fixtures_reader.py b/webfront/tests/fixtures_reader.py index 73aa101e..1ca623d7 100644 --- a/webfront/tests/fixtures_reader.py +++ b/webfront/tests/fixtures_reader.py @@ -115,17 +115,17 @@ def get_fixtures(self): "entry_integrated": self.entries[e]["integrated"], "entry_date": self.entries[e]["entry_date"], "text_entry": e - + " " - + self.entries[e]["type"] - + " " - + (" ".join(self.entries[e]["description"])), + + " " + + self.entries[e]["type"] + + " " + + (" ".join(self.entries[e]["description"])), "protein_acc": p, "protein_db": self.proteins[p]["source_database"], "text_protein": p - + " " - + self.proteins[p]["source_database"] - + " " - + (" ".join(self.proteins[p]["description"])), + + " " + + self.proteins[p]["source_database"] + + " " + + (" ".join(self.proteins[p]["description"])), "ida_id": self.proteins[p]["ida_id"], "ida": self.proteins[p]["ida"], "tax_id": self.proteins[p]["organism"]["taxId"], @@ -250,16 +250,14 @@ def get_fixtures(self): # Creating obj to add for proteins without entry or structure for p in self.proteins: - p_ocurrences = len([t for t in to_add if t['protein_acc']==p ]) + p_ocurrences = len([t for t in to_add if t["protein_acc"] == p]) if p_ocurrences == 0: to_add.append( { "text": p, "protein_acc": p, "protein_db": self.proteins[p]["source_database"], - "text_protein": p - + " " - + self.proteins[p]["source_database"], + "text_protein": p + " " + self.proteins[p]["source_database"], "tax_id": self.proteins[p]["organism"]["taxId"], "tax_name": self.proteins[p]["organism"]["name"], "tax_rank": self.tax2rank[ diff --git a/webfront/tests/test_mail.py b/webfront/tests/test_mail.py index a388dac9..b758bdfd 100644 --- a/webfront/tests/test_mail.py +++ b/webfront/tests/test_mail.py @@ -5,11 +5,13 @@ class TestMail(TestCase): def test_mail(self): self.client = Client() - response = self.client.post('/api/mail/', - { - 'path': 'echo', - 'subject': 'Add annotation test from API', - 'message': 'Test', - 'from_email': 'swaathik@ebi.ac.uk' - }) - self.assertEqual(response.json()['from'], 'swaathik@ebi.ac.uk') + response = self.client.post( + "/api/mail/", + { + "path": "echo", + "subject": "Add annotation test from API", + "message": "Test", + "from_email": "swaathik@ebi.ac.uk", + }, + ) + self.assertEqual(response.json()["from"], "swaathik@ebi.ac.uk") diff --git a/webfront/tests/tests_3_endpoints_using_searcher.py b/webfront/tests/tests_3_endpoints_using_searcher.py index 9a10522d..2e1f562e 100644 --- a/webfront/tests/tests_3_endpoints_using_searcher.py +++ b/webfront/tests/tests_3_endpoints_using_searcher.py @@ -23,7 +23,9 @@ }, "structure": {"pdb": ["1JM7", "1T2V", "2BKM", "1JZ8"]}, "taxonomy": {"uniprot": ["1", "2", "2579", "40296", "344612", "1001583", "10090"]}, - "proteome": {"uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"]}, + "proteome": { + "uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"] + }, "set": {"pfam": ["CL0001", "CL0002"]}, } diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index fb728fac..33c834e1 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -146,8 +146,7 @@ def test_can_read_entry_pfam_id(self): self.assertIn("counters", response.data["metadata"].keys()) self.assertIn("proteins", response.data["metadata"]["counters"].keys()) self.assertIn("entry_annotations", response.data["metadata"].keys()) - self.assertIsInstance(response.data["metadata"]["entry_annotations"], - dict) + self.assertIsInstance(response.data["metadata"]["entry_annotations"], dict) for k, v in response.data["metadata"]["entry_annotations"].items(): self.assertIsInstance(k, str) self.assertIsInstance(v, int) diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 1afe5325..81c37d18 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -25,7 +25,9 @@ def test_can_get_the_entry_type_groups_proteins_by_tax_id(self): response = self.client.get("/api/protein?group_by=tax_id") self.assertEqual(response.status_code, status.HTTP_200_OK) # Only Mus musculus is a key species - self.assertEqual({'10090': {'value': 1, 'title': 'Mus musculus'}}, response.data) + self.assertEqual( + {"10090": {"value": 1, "title": "Mus musculus"}}, response.data + ) def test_can_group_interpro_entries_with_member_databases(self): response = self.client.get("/api/entry/interpro?group_by=member_databases") @@ -381,7 +383,7 @@ def test_annotation_modifier_hmm(self): def test_annotation_modifier_logo(self): response = self.client.get("/api/entry/pfam/pf02171?annotation=logo") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response['content-type'], "application/json") + self.assertEqual(response["content-type"], "application/json") data = json.loads(response.content) self.assertIn("ali_map", data) self.assertEqual(302, len(data["ali_map"])) @@ -392,7 +394,9 @@ def test_annotation_modifier_pfam_alignment(self): self.assertEqual(response["content-type"], "text/plain") def test_annotation_modifier_interpro_alignment(self): - response = self.client.get("/api/entry/interpro/ipr003165?annotation=alignment:seed") + response = self.client.get( + "/api/entry/interpro/ipr003165?annotation=alignment:seed" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response["content-type"], "text/plain") @@ -437,7 +441,8 @@ def test_taxa_modifier(self): def test_no_taxa_modifier(self): response = self.client.get("/api/entry/interpro/IPR001165?taxa") self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - + + class TaxonomyScientificNameModifierTest(InterproRESTTestCase): def test_scientific_name_modifier(self): response = self.client.get("/api/taxonomy/uniprot/?scientific_name=Bacteria") @@ -522,11 +527,17 @@ def test_subfamilies_counter(self): def test_panther_subfamilies(self): for entry in self.entries: - response = self.client.get(f"/api/entry/{entry['db']}/{entry['acc']}?subfamilies") + response = self.client.get( + f"/api/entry/{entry['db']}/{entry['acc']}?subfamilies" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]), 1) - self.assertEqual(response.data["results"][0]["metadata"]["accession"], entry['sf']) - self.assertEqual(response.data["results"][0]["metadata"]["integrated"], entry['acc']) + self.assertEqual( + response.data["results"][0]["metadata"]["accession"], entry["sf"] + ) + self.assertEqual( + response.data["results"][0]["metadata"]["integrated"], entry["acc"] + ) def test_no_subfamilies_in_pfam(self): response = self.client.get(f"/api/entry/pfam/PF02171") diff --git a/webfront/tests/tests_protein_endpoint_entry_filter.py b/webfront/tests/tests_protein_endpoint_entry_filter.py index c787ef3f..17ae4860 100644 --- a/webfront/tests/tests_protein_endpoint_entry_filter.py +++ b/webfront/tests/tests_protein_endpoint_entry_filter.py @@ -138,7 +138,9 @@ def test_urls_that_return_a_protein_details_with_matches(self): "/api/protein/uniprot/" + sp_2 + "/entry/interpro": ["IPR003165", "IPR001165"], - "/api/protein/uniprot/" + sp_1 + "/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/protein/uniprot/" + + sp_1 + + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/protein/uniprot/" + sp_2 + "/entry/pfam": ["PF17176", "PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/pfam": ["PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/smart": ["SM00950"], diff --git a/webfront/tests/tests_structure_endpoint_entry_filter.py b/webfront/tests/tests_structure_endpoint_entry_filter.py index 42c54287..6fef7606 100644 --- a/webfront/tests/tests_structure_endpoint_entry_filter.py +++ b/webfront/tests/tests_structure_endpoint_entry_filter.py @@ -121,7 +121,9 @@ def test_urls_that_return_a_structure_details_with_matches(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_2 + + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/entry/unintegrated": ["PF17180", "PF17176", "PTHR43214"], @@ -165,9 +167,13 @@ def test_urls_that_return_a_structure_details_with_matches_from_chain(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_2 + + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/unintegrated": ["PF17176"], - "/api/structure/pdb/" + pdb_1 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_1 + + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/interpro": ["IPR003165", "IPR001165"], diff --git a/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py b/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py index b85518d4..f27337ee 100644 --- a/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py +++ b/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py @@ -25,7 +25,9 @@ }, "structure": {"pdb": ["1JM7", "1T2V", "2BKM", "1JZ8"]}, "taxonomy": {"uniprot": ["1", "2", "2579", "40296", "344612", "1001583", "10090"]}, - "proteome": {"uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"]}, + "proteome": { + "uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"] + }, } plurals = ModelContentSerializer.plurals @@ -252,7 +254,9 @@ def test_db_db(self): elif response.status_code != status.HTTP_204_NO_CONTENT: self.assertEqual( - response.status_code, status.HTTP_204_NO_CONTENT, "URL : [{}]".format(current), + response.status_code, + status.HTTP_204_NO_CONTENT, + "URL : [{}]".format(current), ) def test_db_acc(self): diff --git a/webfront/tests/tests_utils_endpoint.py b/webfront/tests/tests_utils_endpoint.py index 2e05fb2b..fb9f4310 100644 --- a/webfront/tests/tests_utils_endpoint.py +++ b/webfront/tests/tests_utils_endpoint.py @@ -68,8 +68,8 @@ def test_accession_endpoint_with_gene_name(self): self.assertEqual(response.data["endpoint"], "protein") self.assertEqual(response.data["source_database"], "unreviewed") self.assertIn("proteins", response.data) - self.assertGreater( len(response.data["proteins"]), 0) - self.assertEqual(response.data["proteins"][0]['accession'], "Q0VDM6") + self.assertGreater(len(response.data["proteins"]), 0) + self.assertEqual(response.data["proteins"][0]["accession"], "Q0VDM6") class UtilsReleaseTest(InterproRESTTestCase): diff --git a/webfront/views/mail.py b/webfront/views/mail.py index 884ca1f6..9a36deca 100644 --- a/webfront/views/mail.py +++ b/webfront/views/mail.py @@ -11,61 +11,51 @@ def mail_interhelp(request): ip_address = get_client_ip(request) now = datetime.now() - if not hasattr(settings, 'credentials'): + if not hasattr(settings, "credentials"): return store_credentials_and_mail(request, ip_address, now) else: last_accessed = settings.credentials - if last_accessed['ip'] == ip_address: - then = datetime.strptime(last_accessed['time'], "%Y-%m-%d %H:%M:%S.%f") + if last_accessed["ip"] == ip_address: + then = datetime.strptime(last_accessed["time"], "%Y-%m-%d %H:%M:%S.%f") time_diff = now - then elapsed_min = time_diff / timedelta(minutes=1) if elapsed_min >= 1: return store_credentials_and_mail(request, ip_address, now) else: - data = { - 'error': 'Request Aborted', - } + data = {"error": "Request Aborted"} return JsonResponse(data, status=429) else: return store_credentials_and_mail(request, ip_address, now) def get_client_ip(request): - x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') + x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR") if x_forwarded_for: - ip = x_forwarded_for.split(',')[0] + ip = x_forwarded_for.split(",")[0] else: - ip = request.META.get('REMOTE_ADDR') + ip = request.META.get("REMOTE_ADDR") return ip def store_credentials_and_mail(request, ip, time): - settings.credentials = { - 'ip': ip, - 'time': time.strftime("%Y-%m-%d %H:%M:%S.%f") - } + settings.credentials = {"ip": ip, "time": time.strftime("%Y-%m-%d %H:%M:%S.%f")} return mail(request) def mail(request): - path = request.POST.get('path', INTERPRO_CONFIG.get("sendmail_path")) - subject = request.POST.get('subject', '') - message = request.POST.get('message', '') - from_email = request.POST.get('from_email', '') + path = request.POST.get("path", INTERPRO_CONFIG.get("sendmail_path")) + subject = request.POST.get("subject", "") + message = request.POST.get("message", "") + from_email = request.POST.get("from_email", "") if path and subject and message and from_email: message = MIMEText(message) - message['From'] = from_email - message['To'] = 'interhelp@ebi.ac.uk' - message['Subject'] = subject + message["From"] = from_email + message["To"] = "interhelp@ebi.ac.uk" + message["Subject"] = subject p = Popen([path, "-t", "-oi"], stdin=PIPE) p.communicate(message.as_bytes()) - data = { - 'from': from_email, - 'subject': subject, - } + data = {"from": from_email, "subject": subject} return JsonResponse(data) else: - data = { - 'error': 'Make sure all fields are entered and valid', - } + data = {"error": "Make sure all fields are entered and valid"} return JsonResponse(data, status=400) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 07a59e86..0180550e 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -17,7 +17,12 @@ StructuralModel, ) from webfront.views.custom import filter_queryset_accession_in -from webfront.exceptions import EmptyQuerysetError, HmmerWebError, ExpectedUniqueError, InvalidOperationRequest +from webfront.exceptions import ( + EmptyQuerysetError, + HmmerWebError, + ExpectedUniqueError, + InvalidOperationRequest, +) from django.conf import settings from urllib import request, parse @@ -279,13 +284,12 @@ def filter_by_entry_db(value, general_handler): def filter_by_min_value(endpoint, field, value, sort_direction="asc"): def x(_, general_handler): general_handler.queryset_manager.add_filter( - endpoint, - **{ - "{}__gte".format(field): value - }, + endpoint, **{"{}__gte".format(field): value} ) if sort_direction in ("asc", "desc"): - general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) + general_handler.queryset_manager.order_by( + "{}:{}".format(field, sort_direction) + ) else: raise ValueError("{} is not a valid sorting order".format(sort_direction)) @@ -542,22 +546,23 @@ def get_isoforms(value, general_handler): return {"results": [iso.accession for iso in isoforms], "count": len(isoforms)} + def run_hmmsearch(model): """ run hmmsearch using hmm model against reviewed uniprot proteins """ - parameters = { - "seq": model, - "seqdb": "swissprot", - } - + parameters = {"seq": model, "seqdb": "swissprot"} + enc_params = parse.urlencode(parameters).encode() url = "https://www.ebi.ac.uk/Tools/hmmer/search/hmmsearch" - req = request.Request(url=url, data=enc_params, headers={"Accept": "application/json"}) + req = request.Request( + url=url, data=enc_params, headers={"Accept": "application/json"} + ) with request.urlopen(req) as response: raw_results = response.read().decode("utf-8") results = loads(raw_results) - return results['results']['hits'] + return results["results"]["hits"] + def calculate_conservation_scores(entry_acc): """ @@ -661,45 +666,65 @@ def calculate_residue_conservation(entry_db, general_handler): # will always have one protein in queryset protein = queryset[0] - if protein.source_database != 'reviewed': + if protein.source_database != "reviewed": raise InvalidOperationRequest( - f"Conservation data can only be calculated for proteins in UniProt reviewed." - ) + f"Conservation data can only be calculated for proteins in UniProt reviewed." + ) # get entries matching the sequence from the selected database - q = "protein_acc:{} && entry_db:{}".format(protein.accession.lower(), entry_db.lower()) + q = "protein_acc:{} && entry_db:{}".format( + protein.accession.lower(), entry_db.lower() + ) searcher = general_handler.searcher results = searcher.execute_query(q, None, None) # process each hit sequence = protein.sequence alignments = {"sequence": sequence, entry_db: {"entries": {}}} - if "hits" in results.keys() and "hits" in results["hits"] and len(results["hits"]["hits"]) > 0 : + if ( + "hits" in results.keys() + and "hits" in results["hits"] + and len(results["hits"]["hits"]) > 0 + ): entries = results["hits"]["hits"] for entry in entries: - entry_annotation = EntryAnnotation.objects.filter(accession_id=entry["_source"]["entry_acc"], type="hmm")[0] + entry_annotation = EntryAnnotation.objects.filter( + accession_id=entry["_source"]["entry_acc"], type="hmm" + )[0] model = gzip.decompress(entry_annotation.value).decode("utf-8") hits = run_hmmsearch(model) - protein_dict = {x['acc']: x for x in hits} - protein_hits = list(filter(lambda x: x['acc'] == protein.identifier, hits)) + protein_dict = {x["acc"]: x for x in hits} + protein_hits = list(filter(lambda x: x["acc"] == protein.identifier, hits)) if len(protein_hits) > 0: alignments[entry_db]["entries"][entry_annotation.accession_id] = [] - logo_score = calculate_conservation_scores(entry_annotation.accession_id) - domains = [hit['domains'] for hit in protein_hits][0] + logo_score = calculate_conservation_scores( + entry_annotation.accession_id + ) + domains = [hit["domains"] for hit in protein_hits][0] for hit in domains: # calculate scores for each domain hit for each entry mappedseq, modelseq, hmmfrom, hmmto, alisqfrom, alisqto = align_seq_to_model( hit, sequence ) matrixseq = get_hmm_matrix( - logo_score, alisqfrom, alisqto, hmmfrom, hmmto, mappedseq, modelseq + logo_score, + alisqfrom, + alisqto, + hmmfrom, + hmmto, + mappedseq, + modelseq, ) formatted_matrix = format_logo(matrixseq) - alignments[entry_db]["entries"][entry_annotation.accession_id].append(formatted_matrix) + alignments[entry_db]["entries"][ + entry_annotation.accession_id + ].append(formatted_matrix) else: - if 'warnings' not in alignments[entry_db]: - alignments[entry_db]['warnings'] = [] - alignments[entry_db]['warnings'].append(f"Hmmer did not match Entry {entry_annotation.accession_id} with Protein {protein.identifier}.") + if "warnings" not in alignments[entry_db]: + alignments[entry_db]["warnings"] = [] + alignments[entry_db]["warnings"].append( + f"Hmmer did not match Entry {entry_annotation.accession_id} with Protein {protein.identifier}." + ) return alignments @@ -782,10 +807,11 @@ def get_sunburst_taxa(value, general_handler): taxa = EntryTaxa.objects.filter( accession__in=general_handler.queryset_manager.get_queryset() ) - if taxa.count()==0: + if taxa.count() == 0: raise EmptyQuerysetError("This entry doesn't have taxa") return {"taxa": taxa.first().tree} + def extra_features(value, general_handler): features = ProteinExtraFeatures.objects.filter( protein_acc__in=general_handler.queryset_manager.get_queryset() @@ -873,11 +899,10 @@ def get_subfamilies(value, general_handler): queryset = general_handler.queryset_manager.get_queryset().first() entries = Entry.objects.filter(integrated=queryset.accession, is_alive=False) if len(entries) == 0: - raise EmptyQuerysetError( - "There is are not subfamilies for this entry" - ) + raise EmptyQuerysetError("There is are not subfamilies for this entry") general_handler.modifiers.search_size = len(entries) return entries + def passing(x, y): pass diff --git a/webfront/views/utils.py b/webfront/views/utils.py index 1fcf206f..56805f51 100644 --- a/webfront/views/utils.py +++ b/webfront/views/utils.py @@ -117,7 +117,9 @@ def get( "accession": first.accession, } else: - qs2 = Protein.objects.filter(tax_id__in=list(organisms.keys())).filter(gene__iexact=acc) + qs2 = Protein.objects.filter(tax_id__in=list(organisms.keys())).filter( + gene__iexact=acc + ) if qs2.count() > 0: first = qs2.first() @@ -125,10 +127,13 @@ def get( "endpoint": "protein", "source_database": first.source_database, "proteins": [ - {"accession": item.accession, - "organism": item.organism["scientificName"], - "tax_id":item.tax_id - } for item in qs2], + { + "accession": item.accession, + "organism": item.organism["scientificName"], + "tax_id": item.tax_id, + } + for item in qs2 + ], } else: hit = docs["hits"]["hits"][0]["_source"]