Skip to content

Commit

Permalink
Merge pull request #42 from ProteinsWebTeam/dev
Browse files Browse the repository at this point in the history
Last check before moving it to master
  • Loading branch information
gustavo-salazar authored Feb 10, 2021
2 parents 2c4d506 + f0b8814 commit 04d615a
Show file tree
Hide file tree
Showing 20 changed files with 570 additions and 118 deletions.
6 changes: 6 additions & 0 deletions webfront/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,9 @@ class QuerysetType(Enum):
ENTRY_PROTEIN = 150
ENTRY_STRUCTURE = 160
STRUCTURE_PROTEIN = 250


class ModifierType(Enum):
FILTER = 1
REPLACE_PAYLOAD = 2
EXTEND_PAYLOAD = 3
19 changes: 19 additions & 0 deletions webfront/migrations/0012_seq_and_seq_raw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 3.0.7 on 2021-01-11 13:42

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [("webfront", "0011_pfam2interpro")]

operations = [
migrations.RemoveField(model_name="protein", name="extra_features"),
migrations.RemoveField(model_name="protein", name="residues"),
migrations.RemoveField(model_name="protein", name="sequence"),
migrations.AddField(
model_name="protein",
name="sequence_bin",
field=models.BinaryField(db_column="sequence", null=True),
),
]
38 changes: 38 additions & 0 deletions webfront/migrations/0013_protein_extra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 3.0.7 on 2021-01-11 16:29

from django.db import migrations, models
import jsonfield.fields


class Migration(migrations.Migration):

dependencies = [("webfront", "0012_seq_and_seq_raw")]

operations = [
migrations.CreateModel(
name="ProteinExtraFeatures",
fields=[
("feature_id", models.IntegerField(primary_key=True, serialize=False)),
("protein_acc", models.CharField(max_length=15)),
("entry_acc", models.CharField(max_length=25)),
("source_database", models.CharField(max_length=10)),
("location_start", models.IntegerField()),
("location_end", models.IntegerField()),
("sequence_feature", models.CharField(max_length=35)),
],
options={"db_table": "webfront_proteinfeature"},
),
migrations.CreateModel(
name="ProteinResidues",
fields=[
("residue_id", models.IntegerField(primary_key=True, serialize=False)),
("protein_acc", models.CharField(max_length=15)),
("entry_acc", models.CharField(max_length=25)),
("entry_name", models.CharField(max_length=100)),
("source_database", models.CharField(max_length=10)),
("description", models.CharField(max_length=255)),
("fragments", jsonfield.fields.JSONField(null=True)),
],
options={"db_table": "webfront_proteinresidue"},
),
]
21 changes: 21 additions & 0 deletions webfront/migrations/0014_structural_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 3.0.7 on 2021-02-05 10:34

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [("webfront", "0013_protein_extra")]

operations = [
migrations.CreateModel(
name="StructuralModel",
fields=[
("model_id", models.IntegerField(primary_key=True, serialize=False)),
("accession", models.CharField(max_length=25)),
("contacts", models.BinaryField()),
("structure", models.BinaryField()),
],
options={"db_table": "webfront_structuralmodel"},
)
]
51 changes: 48 additions & 3 deletions webfront/models/interpro_new.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import gzip

from django.db import models
from jsonfield import JSONField

Expand Down Expand Up @@ -60,7 +62,7 @@ class Protein(models.Model):
organism = JSONField(null=True)
name = models.CharField(max_length=20)
description = JSONField(null=True)
sequence = models.TextField(null=False)
sequence_bin = models.BinaryField(db_column="sequence", null=True)
length = models.IntegerField(null=False)
proteome = models.CharField(max_length=20, null=True)
gene = models.CharField(max_length=70, null=True)
Expand All @@ -69,15 +71,48 @@ class Protein(models.Model):
source_database = models.CharField(
max_length=20, default="unreviewed", db_index=True
)
residues = JSONField(null=True)
extra_features = JSONField(null=True)
# residues = JSONField(null=True)
# extra_features = JSONField(null=True)
structure = JSONField(default={}, null=True)
is_fragment = models.BooleanField(default=False)
tax_id = models.CharField(max_length=20, null=False, default="")
ida_id = models.CharField(max_length=40, null=True)
ida = models.TextField(null=True)
counts = JSONField(null=True)

@property
def sequence(self):
if self.sequence_bin is not None:
return gzip.decompress(self.sequence_bin)
else:
return None


class ProteinExtraFeatures(models.Model):
feature_id = models.IntegerField(primary_key=True)
protein_acc = models.CharField(max_length=15)
entry_acc = models.CharField(max_length=25)
source_database = models.CharField(max_length=10)
location_start = models.IntegerField()
location_end = models.IntegerField()
sequence_feature = models.CharField(max_length=35)

class Meta:
db_table = "webfront_proteinfeature"


class ProteinResidues(models.Model):
residue_id = models.IntegerField(primary_key=True)
protein_acc = models.CharField(max_length=15)
entry_acc = models.CharField(max_length=25)
entry_name = models.CharField(max_length=100)
source_database = models.CharField(max_length=10)
description = models.CharField(max_length=255)
fragments = JSONField(null=True)

class Meta:
db_table = "webfront_proteinresidue"


class Structure(models.Model):
accession = models.CharField(max_length=4, primary_key=True)
Expand Down Expand Up @@ -190,3 +225,13 @@ class Isoforms(models.Model):

class Meta:
db_table = "webfront_varsplic"


class StructuralModel(models.Model):
model_id = models.IntegerField(primary_key=True)
accession = models.CharField(max_length=25, null=False)
contacts = models.BinaryField()
structure = models.BinaryField()

class Meta:
db_table = "webfront_structuralmodel"
20 changes: 10 additions & 10 deletions webfront/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ class CustomPagination(CursorPagination):
before_key = None

def get_paginated_response(self, data):
return Response(
OrderedDict(
[
("count", self.current_size),
("next", self.get_next_link()),
("previous", self.get_previous_link()),
("results", data),
]
)
)
base = [
("count", self.current_size),
("next", self.get_next_link()),
("previous", self.get_previous_link()),
("results", data["data"]),
]
if "extensions" in data and len(data["extensions"]) > 0:
for ext in data["extensions"]:
base.append((ext, data["extensions"][ext]))
return Response(OrderedDict(base))

def _get_position_from_instance(self, instance, ordering):
if type(instance) == tuple:
Expand Down
51 changes: 51 additions & 0 deletions webfront/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Testing
===

Generating fixtures for gzip BinaryFields
---
This in an example of how to generate the fixtures file for `StructuralModel`. The same approach was used for the sequence in the `Protein` table.

1. Go the djano shell. Make sure you are using the test DB by setting `use_test_db: true` in `interpro.local.yml`.
```shell
python3 manage.py shell
```

2. Manually create the fixture, using `gzip` and `bytes` for the binary fields
```python
import gzip
from webfront.models import StructuralModel
contacts = "[[1,11,0.5], [2,30,0.8], [3,30,0.8]]"
contacts_gz = gzip.compress(bytes(contacts,'utf-8'))
structure = """ATOM 1 N VAL A 1 -0.701 1.770 1.392 1.00 4.92 N
ATOM 2 CA VAL A 1 0.691 2.052 1.718 1.00 4.92 C
ATOM 3 C VAL A 1 1.384 2.880 0.637 1.00 4.92 C
ATOM 4 O VAL A 1 0.991 2.879 -0.532 1.00 4.92 O
"""
structure_gz = gzip.compress(bytes(structure,'utf-8'))
model = StructuralModel(model_id=1,accession='PF17176',contacts=contacts_gz,structure=structure_gz)
model.save()
```

3. Generate the fixture using the `dumpdata` tool in django:
```shell
python manage.py dumpdata webfront --indent 4
```
```json
[
{
"model": "webfront.structuralmodel",
"pk": 1,
"fields": {
"accession": "PF17176",
"contacts": "H4sIAOAnHWAC/4uONtQxNNQx0DON1VGINtIxNgCyLUBsYxg7FgCIHLXIJAAAAA==",
"structure": "H4sIAOcnHWAC/42QMQ7DMAhF95yCCxRhOy54RFnbZIly/6MEcBWpSi2V5X9s/Qe27tsbohLAanLoC7S3Xg9CJvcJmSm0tOxC1s3o/irLT3oB7WbRGxAIn819Rqq5g5MMgMsXsMTBDWgbyRxAEeoDCv8FtNQGvzZsnw2FW3xBLaMnW346Aa6DA5lEAQAA"
}
}
]
```
4. Now you can use the generated JSON to included in one of the fixture files in `webfront/tests/`.

In this example the generated fixture is included at the end of `webfront/tests/fixtures_structure.json`.
54 changes: 29 additions & 25 deletions webfront/tests/fixtures_protein.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"description": [
"Vacuolar carboxypeptidase involved in degradation of small peptides. Digests preferentially peptides containing an aliphatic or hydrophobic residue in P1' position, as well as methionine, leucine or phenylalanine in P1 position of ester substrate (By similarity)."
],
"sequence": "MRVLPATLLVGAATAAVPPFQQILGLPKKGADTLSKPLHDFQEQLKTLSDDARRLWDEVAKHFPDSMDHNPVFSLPKKHTRRPDSHWDHIVRGADVQSVWVTGANGEKEREVDGKLEAYDLRVKTTDPGALGIDPGVKQYTGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFLELGPSSIDSKIKPVYNDFAWNSNASVIFLDQPVNVGYSYSGSAVSDTVAAGKDVYALLTLFFKQFPEYAKQDFHIAGESYAGHYIPVFASEILSHKKRNINLKSVLIGNGLTDPLTQYDHYRPMACGDGGYPAVLDEASCQSMDNALPRCKSMIESCYNTESSWVCVPASIYCNNALIGPYQRTGQNVYDVRGKCEDESNLCYKGMGYVSEYLNKREVREAVGAEVDGYDSCNFDINRNFLFHGDWMKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEYASAELEDLVIEQNEHQGKKIGQIKSHGNFTFMRLYGGGHMVPMDQPEASLEFFNRWIGGEWF",
"sequence_bin": "H4sIAEFb/F8C/xWRbQpFIQhE1yb5UeiVyjDc/0Ze74eIysDM8dtpE45ZCsAByDl5rWFiU1UAj4VO68iLlumbEGFvu0gJ2nlifNh9Jsdf0M/eb9Uv9pH7yXNF3jwCLqS0KVHUCAptp56DU8BkvJa66kgZopOjduO6zBTbceqrTLvmIlNahB0xNjKZEQNDh84sR4br4RA52HDN9JSKCgnIwJMAopgFZseYdfGkAl3IfYBQFEiv8ZJA0LDoqtuHm0baEBd7Zu2swl57ftAERWpCGhJEWw+Dg83dNL5B0coPRdxsOSFGNX/XIbPWPrI8Cx8ebYQUbq1UPqkMKnN9kDbBe8cfVmE0Zxy+nY274P10Vt+WU8xoPb82nnME5NH8mrjCDQKjO2XpSxhARmg5aDn1JapD1tDo4nz421Yi0r+c32P2shgx+75DhC7/APPZcnUfAgAA",
"length": "543",
"proteome": "UP000006701",
"gene": "cpyA",
Expand All @@ -30,23 +30,6 @@
"evidence_code": 3,
"source_database": "reviewed",
"is_fragment": false,
"residues": {},
"extra_features": {
"TMhelix": {
"accession": "TMhelix",
"source_database": "tmhmm",
"locations": [
{
"fragments": [
{
"start": 265,
"end": 287
}
]
}
]
}
},
"counts": {
"entries": {
"total": 5,
Expand All @@ -72,7 +55,7 @@
},
"name": "Band 7 protein",
"description": [],
"sequence": "MESGIIEILIRNGVSHMTEKPVFHINGYLGLILVLVILGLGVYLSVVGWGVLGVILVVLAVLAASSLTIIEPNQSKVLTFFGRYIGTIKESGLYLTVPLTTKTTVSLRVRNFNSAILKVNDLQGNPVEIAAVIVFKVVDTSKALFAVEDYEKFVEIQSESAIRHVASEYAYDNFGDHQALTLRSNPTEVSNHLTEELQARLEVAGVQIIETRLTHLAYATEIASAMLQRQQSQAILSARKIIVEGAVSITEGAIEQLAAETDLHLTDNQKLQLINNMMVSIINERGSQPVINTGKVE",
"sequence_bin": "H4sIAEFc/F8C/w2PAQoEMQgD3ybUutKsbKvk6P8/coJgMDrqq2nu6vATxnze0vVxPh52YXAQ9BbGiyTtZ2zdZUI6JBPV81/sXETNaee6la/m4qL4oWpVMXF4YkaKYzEGtsVHdRE65yJH5RJMoY6ra7a1U7v7PJTUK3fEtPFsQeFkfKXMeFCq2HKgFOPuU+qgHsiVanjKi332zt1rU85yp5owvTq57n5Ba6A5I/bChke8b/seeiz3R4+yRf0D4x3lAykBAAA=",
"length": 297,
"proteome": "UP000012042",
"gene": "LVISKB_0797",
Expand Down Expand Up @@ -153,7 +136,6 @@
"evidence_code": 1,
"source_database": "reviewed",
"is_fragment": true,
"residues": {},
"counts": {
"entries": {
"pfam": 1,
Expand All @@ -178,7 +160,7 @@
},
"name": "Propeptide, carboxypeptidase Y",
"description": [],
"sequence": "MRVLSTTLLVGAASAAAPSFQQVLGAHSEHAENVAQQGADAFKPLQHLQDQFKSLSSEARQLWEEVSNYFPESMGSAPMLSLPKKHTRRPDSHWDYHVSGAKVQDIWVSGAEGTKEREVDGKLEDYALRAKKVDPSALGIDPGVKQYSGYLDDNENDKHLFYWFFESRNDPKNDPVVLWLNGGPGCSSLTGLFMELGPSSIGANIKPIYNDFSWNNNASVIFLDQPINVGYSYSGSSVSDTVAAGKDVYALLTLFFKQFPEYATQDFHIAGESYAGHYIPVMASEILSHKKRNINLKSVLIGNGLTDGLTQYEYYRPMACGEGGYPAVLDESTCQSMDNALSRCQSMIQSCYNSESPWVCVPASIYCNNAMLGPYQRTGQNVYDVRGKCEDESNLCYKGLGYVSEYLGQESVREAVGAEVDGYDSCNFDINRNFLFNGDWFKPYHRLVPGLLEQIPVLIYAGDADFICNWLGNKAWSEALEWPGQKEFASAELEDLKIVQNEHVGKKIGQIKSHGNFTFMRIFGGGHMVPMDQPESGLEFFNRWIGGEWF",
"sequence_bin": "H4sIAMhc/F8C/xWRSw5EIQgEz0YeHw1IVAyG+19knAVJb4BK19hpcY5ZCkAAwAxeK02gBTUgT1hLAIF12mq2cLGGRRDsZZcow4snxZCAOSxsqraz98RoF6tlCGgu7PefSI7SpkRRIyywDaqJM8Ck45TUVSFliE6O2ozrMlNsx6lvMu2ai0z5IuyI8SCTGdEFvOvs5chx3R0iOxuu2T2l4l2NyMCTAKKY77UdY9b14AvOQm4dhKJAWvWZA4K6RVPd3t000rq42ME3q6hqzwGfkEhNSEOK860Y6GCx/6mv+MqDYt78ckL0+h7XeLy19pHlWZhb9KO37PaVikllUJksitwEz8q/rML4nLH7djZ2wft0VNuWU8xoPV7rjxwBuX9+TVzhPkdGd8pS4ueWXuOmPZdTS1HtsrpGE+fDY3cWkTZyjtcZhRgx+75dhC7/APSq380mAgAA",
"length": "550",
"proteome": "UP000030104",
"gene": "PITC_084940",
Expand All @@ -195,7 +177,6 @@
"evidence_code": 1,
"source_database": "unreviewed",
"is_fragment": false,
"residues": {},
"counts": {
"entries": {
"total": 0
Expand All @@ -219,7 +200,7 @@
"description": [
"Receptor for lutropin-choriogonadotropic hormone. The activity of this receptor is mediated by G proteins which activate adenylate cyclase."
],
"sequence": "MRRRSLALRLLLALLLLPPPLPQTLLGAPCPEPCSCRPDGALRCPGPRAGLSRLSLTYLPIKVIPSQAFRGLNEVVKIEISQSDSLEKIEANAFDNLLNLSEILIQNTKNLVYIEPGAFTNLPRLKYLSICNTGIRKLPDVTKIFSSEFNFILEICDNLHITTVPANAFQGMNNESITLKLYGNGFEEIQSHAFNGTTLISLELKENAHLKKMHNDAFRGARGPSILDISSTKLQALPSYGLESIQTLIATSSYSLKKLPSREKFTNLLDATLTYPSHCCAFRNLPTKEQNFSFSIFKNFSKQCESTARRPNNETLYSAIFAESELSDWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYDFLRVLIWLINILAIMGNVTVLFVLLTSHYKLTVPRFLMCNLSFADFCMGLYLLLIASVDAQTKGQYYNHAIDWQTGNGCSVAGFFTVFASELSVYTLTVITLERWHTITYAIQLDQKLRLRHAIPIMLGGWLFSTLIAMLPLVGVSSYMKVSICLPMDVETTLSQVYILTILILNVVAFIIICACYIKIYFAVQNPELMATNKDTKIAKKMAVLIFTDFTCMAPISFFAISAALKVPLITVTNSKVLLVLFYPVNSCANPFLYAIFTKAFRRDFFLLLSKSGCCKHQAELYRRKDFSAYCKNGFTGSNKPSRSTLKLTTLQCQYSTVMDKTCYKDC",
"sequence_bin": "H4sIAE1d/F8C/xWRUQ4EIQhDz0ZUHEKHoBA33v8iy/ihhoD2te/eO0DYQO213B2+EpjkzYe3aNv7rI7m0zdNxEYgL1z0iMci3hM2zlEZEit6YNSVjLgbYIghkGWphnNl+CROg2/oRUiznLIV3k+qcMRgY8GQVtOPZB7/nlrzNRshCcWdNnkMWfEQ28yE1J/QYfRA9X2sf6JoTw9Bl4hULILHnag3Ck8oI25Ud1X30E8ROmWBeTyt1XxJTB3LODiEtU5dbUTS3l5SEjdImEYMRP/12+/kFq6J1aicKw3mbXR55+2MfSA/iAmoKnbygA+Q8VxFQW7G28osps7tnbgVhlCcTit1rnvtIem/lQXf4tBkzsP0/X5u6T5lzdi/JyUvyUJfWqnuGnJ5MecPHB/3C8eZp+hfPWU//O1nlIexKh1kZQU7h1hEGrUrKpfpLPOBl9K0V0xULlMBcXbO9pJLMJMEEfQ4JE9aaOEV5PVj0cicUcI4tbzdnbn4QmO2ps+igbu3dg66TSvdnGFaycSXd36Wrht53q7Zrvb2B5BjI824AgAA",
"length": 696,
"proteome": "UP000030104",
"gene": "LHCGR",
Expand All @@ -236,7 +217,6 @@
"evidence_code": 4,
"source_database": "unreviewed",
"is_fragment": false,
"residues": {},
"counts": {
"entries": {
"profile": 1,
Expand All @@ -249,7 +229,7 @@
"ida_id": 590134
}
},
{
{
"model": "webfront.Isoforms",
"fields": {
"accession": "A1CUJ5-2",
Expand Down Expand Up @@ -289,5 +269,29 @@
}
}
}
},
{
"model": "webfront.ProteinExtraFeatures",
"fields": {
"feature_id": 1,
"protein_acc": "A1CUJ5",
"entry_acc": "TMhelix",
"source_database": "tmhmm",
"sequence_feature": "TMhelix",
"location_start": 265,
"location_end": 287
}
},
{
"model": "webfront.ProteinResidues",
"fields": {
"residue_id": 1,
"entry_name": "the residue",
"description": "a single residue",
"fragments": [["X",5,5]],
"protein_acc": "A1CUJ5",
"entry_acc": "residue",
"source_database": "cddd"
}
}
]
10 changes: 10 additions & 0 deletions webfront/tests/fixtures_structure.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,5 +217,15 @@
"sets": 0
}
}
},
{
"model": "webfront.structuralmodel",
"pk": 1,
"fields": {
"accession": "PF17176",
"contacts": "H4sIAOAnHWAC/4uONtQxNNQx0DON1VGINtIxNgCyLUBsYxg7FgCIHLXIJAAAAA==",
"structure": "H4sIAOcnHWAC/42QMQ7DMAhF95yCCxRhOy54RFnbZIly/6MEcBWpSi2V5X9s/Qe27tsbohLAanLoC7S3Xg9CJvcJmSm0tOxC1s3o/irLT3oB7WbRGxAIn819Rqq5g5MMgMsXsMTBDWgbyRxAEeoDCv8FtNQGvzZsnw2FW3xBLaMnW346Aa6DA5lEAQAA"
}
}

]
Loading

0 comments on commit 04d615a

Please sign in to comment.