Skip to content

Commit

Permalink
Chore: Refactor Django orcabus_id (#784)
Browse files Browse the repository at this point in the history
  • Loading branch information
williamputraintan authored Dec 19, 2024
1 parent 955689d commit 80fd526
Show file tree
Hide file tree
Showing 76 changed files with 510 additions and 577 deletions.
73 changes: 40 additions & 33 deletions lib/workload/stateless/stacks/metadata-manager/app/fields.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,55 @@
import hashlib

import ulid
from django.core.validators import RegexValidator
from django.db import models

ULID_REGEX_STR = r"[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}"
ulid_validator = RegexValidator(regex=ULID_REGEX_STR,
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id')


def get_ulid() -> str:
return ulid.new().str

class HashField(models.CharField):
description = (
"HashField is related to some base fields (other columns) in a model and"
"stores its hashed value for better indexing performance."
)

def __init__(self, base_fields, *args, **kwargs):
"""
:param base_fields: name of fields storing the value to be hashed
"""
self.base_fields = base_fields
kwargs["max_length"] = 64
super(HashField, self).__init__(*args, **kwargs)
class UlidField(models.CharField):
description = "An OrcaBus internal ID (ULID)"

def __init__(self, *args, **kwargs):
kwargs['max_length'] = 26 # ULID length
kwargs['validators'] = [ulid_validator]
kwargs['default'] = get_ulid
super().__init__(*args, **kwargs)

def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
del kwargs["max_length"]
if self.base_fields is not None:
kwargs["base_fields"] = self.base_fields
del kwargs['validators']
del kwargs['default']
return name, path, args, kwargs

def pre_save(self, instance, add):
self.calculate_hash(instance)
return super(HashField, self).pre_save(instance, add)

def calculate_hash(self, instance):
sha256 = hashlib.sha256()
for field in self.base_fields:
value = getattr(instance, field)
sha256.update(value.encode("utf-8"))
setattr(instance, self.attname, sha256.hexdigest())
class OrcaBusIdField(UlidField):
description = "An OrcaBus internal ID (based on ULID)"

def __init__(self, prefix='', *args, **kwargs):
self.prefix = prefix
super().__init__(*args, **kwargs)

@property
def non_db_attrs(self):
return super().non_db_attrs + ("prefix",)

class HashFieldHelper(object):
def __init__(self):
self.__sha256 = hashlib.sha256()
def from_db_value(self, value, expression, connection):
if value and self.prefix != '':
return f"{self.prefix}.{value}"
else:
return value

def add(self, value):
self.__sha256.update(value.encode("utf-8"))
return self
def to_python(self, value):
# This will be called when the function
return self.get_prep_value(value)

def calculate_hash(self):
return self.__sha256.hexdigest()
def get_prep_value(self, value):
# We just want the last 26 characters which is the ULID (ignoring any prefix) when dealing with the database
return value[-26:]
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Generated by Django 5.1.4 on 2024-12-17 01:44

import app.fields
from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('app', '0002_remove_historicalcontact_history_user_and_more'),
]

operations = [
migrations.AlterField(
model_name='contact',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='historicalcontact',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalindividual',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicallibrary',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalproject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalsample',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='historicalsubject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(db_index=True),
),
migrations.AlterField(
model_name='individual',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='library',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='project',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='sample',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
migrations.AlterField(
model_name='subject',
name='orcabus_id',
field=app.fields.OrcaBusIdField(primary_key=True, serialize=False),
),
]
31 changes: 11 additions & 20 deletions lib/workload/stateless/stacks/metadata-manager/app/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from simple_history.models import HistoricalRecords

from rest_framework.settings import api_settings

from app.pagination import PaginationConstant

logger = logging.getLogger(__name__)
Expand All @@ -42,7 +43,9 @@ def reduce_multi_values_qor(key: str, values: List[str]):
):
values = [values]
return reduce(
operator.or_, (Q(**{"%s__iexact" % key: value})
# Apparently the `get_prep_value` from the custom fields.py is not called prior hitting the Db but,
# the regular `__exact` still execute that function.
operator.or_, (Q(**{"%s__exact" % key: value})
for value in values)
)

Expand Down Expand Up @@ -102,7 +105,6 @@ def update_or_create_if_needed(self, search_key: dict, data: dict, user_id: str
"""
is_created = False
is_updated = False

try:
obj = self.get(**search_key)
for key, value in data.items():
Expand All @@ -125,27 +127,16 @@ class BaseModel(models.Model):
class Meta:
abstract = True

orcabus_id = models.CharField(
primary_key=True,
unique=True,
editable=False,
blank=False,
null=False,
validators=[
RegexValidator(
regex=r'[\w]{26}$',
message='ULID is expected to be 26 characters long',
code='invalid_orcabus_id'
)]

)

def save(self, *args, **kwargs):
if not self.orcabus_id:
self.orcabus_id = ulid.new().str
# To make django validate the constraint before saving it
self.full_clean()

return super(BaseModel, self).save(*args, **kwargs)
super(BaseModel, self).save(*args, **kwargs)

# Reload the object from the database to ensure custom fields like OrcaBusIdField
# invoke the `from_db_value` method (which provides the annotation) after saving.
self.refresh_from_db()


@classmethod
def get_fields(cls):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


class ContactManager(BaseManager):
pass


class Contact(BaseModel):
orcabus_id_prefix = 'ctc.'
objects = ContactManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='ctc')
contact_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand All @@ -8,9 +9,9 @@ class IndividualManager(BaseManager):


class Individual(BaseModel):
orcabus_id_prefix = 'idv.'
objects = IndividualManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='idv')
individual_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseManager, BaseModel, BaseHistoricalRecords
from app.models.subject import Subject
from app.models.sample import Sample
Expand Down Expand Up @@ -64,9 +65,9 @@ class LibraryProjectLink(models.Model):


class Library(BaseModel):
orcabus_id_prefix = 'lib.'
objects = LibraryManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='lib')
library_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.contact import Contact
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords

Expand All @@ -19,9 +20,9 @@ class ProjectContactLink(models.Model):


class Project(BaseModel):
orcabus_id_prefix = 'prj.'
objects = ProjectManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='prj')
project_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ulid
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand Down Expand Up @@ -31,9 +32,9 @@ class SampleManager(BaseManager):


class Sample(BaseModel):
orcabus_id_prefix = 'smp.'
objects = SampleManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='smp')
sample_id = models.CharField(
unique=True,
blank=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models

from app.fields import OrcaBusIdField
from app.models.base import BaseModel, BaseManager, BaseHistoricalRecords


Expand All @@ -17,8 +18,9 @@ class SubjectIndividualLink(models.Model):


class Subject(BaseModel):
orcabus_id_prefix = 'sbj.'
objects = SubjectManager()

orcabus_id = OrcaBusIdField(primary_key=True, prefix='sbj')
subject_id = models.CharField(
unique=True,
blank=True,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,22 +1,14 @@
from abc import ABC

from rest_framework import serializers

from rest_framework.serializers import ModelSerializer
from app.models import Contact
from .base import SerializersBase


class ContactBaseSerializer(SerializersBase):
prefix = Contact.orcabus_id_prefix


class ContactSerializer(ContactBaseSerializer):
class ContactSerializer(ModelSerializer):
class Meta:
model = Contact
fields = "__all__"


class ContactDetailSerializer(ContactBaseSerializer):
class ContactDetailSerializer(ModelSerializer):
from .project import ProjectSerializer

project_set = ProjectSerializer(many=True, read_only=True)
Expand All @@ -26,7 +18,7 @@ class Meta:
fields = "__all__"


class ContactHistorySerializer(ContactBaseSerializer):
class ContactHistorySerializer(ModelSerializer):
class Meta:
model = Contact.history.model
fields = "__all__"
Loading

0 comments on commit 80fd526

Please sign in to comment.