Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jai/restructure #747

Open
wants to merge 5 commits into
base: optimization/code_cleanup
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion accounts/tests/test_urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.test import SimpleTestCase
from django.urls import resolve, reverse
from accounts.views import RegisterViewset
from datahub.views import ParticipantViewSet
from participant.views import ParticipantViewSet
from django.test import SimpleTestCase
from django.urls import resolve, reverse

Expand Down
9 changes: 5 additions & 4 deletions accounts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
UserCreateSerializerValidator,
)
from core.utils import Utils
from datahub.models import UserOrganizationMap
from participant.models import UserOrganizationMap
from utils import login_helper, string_functions
from utils.jwt_services import http_request_mutation

Expand All @@ -44,15 +44,16 @@
date_formater,
read_contents_from_csv_or_xlsx_file,
)
from datahub.models import (
from participant.models import (
DatahubDocuments,
Datasets,
DatasetV2,
DatasetV2File,
Organization,
UserOrganizationMap,
)
from datahub.serializers import (

from datasets.models import Datasets
from participant.serializers import (
DatahubDatasetsSerializer,
DatahubDatasetsV2Serializer,
DatahubThemeSerializer,
Expand Down
2 changes: 1 addition & 1 deletion connectors/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from accounts.models import User
from core import settings
from core.base_models import TimeStampMixin
from datahub.models import DatasetV2File
from participant.models import DatasetV2File

# Create your models here.

Expand Down
4 changes: 2 additions & 2 deletions connectors/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from connectors.models import Connectors, ConnectorsMap
from core import settings
from core.constants import Constants
from datahub.models import DatasetV2, DatasetV2File, Organization, UserOrganizationMap
from datahub.serializers import DatasetV2FileSerializer
from participant.models import DatasetV2, DatasetV2File, Organization, UserOrganizationMap
from participant.serializers import DatasetV2FileSerializer
from django.db.models import Subquery, Min, Count
from django.db import models

Expand Down
2 changes: 1 addition & 1 deletion connectors/tests/test_connectors_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from django.test import Client, TestCase
from rest_framework import status
import json
from datahub.models import DatasetV2, Organization, UserOrganizationMap, DatasetV2File
from participant.models import DatasetV2, Organization, UserOrganizationMap, DatasetV2File
from accounts.models import User, UserRole
from connectors.models import Connectors, ConnectorsMap
from participant.tests.test_util import TestUtils
Expand Down
2 changes: 1 addition & 1 deletion connectors/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from core import settings
from core.constants import Constants
from core.utils import CustomPagination
from datahub.models import Datasets
from datasets.models import Datasets
from utils.authentication_services import authenticate_user
from utils.jwt_services import http_request_mutation
from rest_framework.exceptions import ValidationError
Expand Down
1 change: 1 addition & 0 deletions core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
# custom apps
"accounts",
"datahub",
"datasets",
"participant",
"microsite",
"connectors",
Expand Down
2 changes: 1 addition & 1 deletion core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from rest_framework_simplejwt.tokens import AccessToken, RefreshToken

from core.constants import Constants
from datahub.models import DatasetV2File, UsagePolicy, UserOrganizationMap
from participant.models import DatasetV2File, UsagePolicy, UserOrganizationMap
from utils.jwt_services import http_request_mutation

# @http_request_mutation
Expand Down
24 changes: 12 additions & 12 deletions datahub/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class UserOrganizationMap(TimeStampMixin):
"""UserOrganizationMap model for mapping User and Organization model"""

id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
user = models.ForeignKey(User, on_delete=models.CASCADE)
user = models.ForeignKey(User, on_delete=models.CASCADE, related_name="datahub_org_map_user")
organization = models.ForeignKey(Organization, on_delete=models.CASCADE)


Expand Down Expand Up @@ -203,17 +203,17 @@ def __init__(self, dataset_name, source):
# def size(self, name):
# path = self.path(name)
# return os.path.getsize(path)

def exists(self, name):
"""
Check if a file with the given name already exists in the storage.
"""
return os.path.exists(name)

def url(self, url):
return url

def _save(self, name, content):
def _save(self, name, content):
# Save file to a directory outside MEDIA_ROOT
full_path = os.path.join(settings.DATASET_FILES_URL, name)
directory = os.path.dirname(full_path)
Expand Down Expand Up @@ -260,20 +260,20 @@ class DatasetV2File(TimeStampMixin):
def dataset_directory_path(instance, filename):
# file will be uploaded to MEDIA_ROOT/user_<id>/<filename>
return f"{settings.DATASET_FILES_URL}/{instance.dataset.name}/{instance.source}/{filename}"

def get_upload_path(instance, filename):
return f"{instance.dataset.name}/{instance.source}/{filename}"

def save(self, *args, **kwargs):
# set the user_id before saving
storage = CustomStorage(self.dataset.name, self.source)
self.file.storage = storage # type: ignore

# if self.file:
# # Get the file size
# size = self.file.size
# self.file_size = size

super().save(*args, **kwargs)

SOURCES = [
Expand All @@ -291,7 +291,7 @@ def save(self, *args, **kwargs):
standardised_configuration = models.JSONField(default = dict)
accessibility = models.CharField(max_length=255, null=True, choices=USAGE_POLICY_APPROVAL_STATUS, default="public")
connection_details = models.JSONField(default=dict, null=True)

class UsagePolicy(TimeStampMixin):
"""
Policy documentation Model.
Expand Down Expand Up @@ -324,7 +324,7 @@ def __str__(self) -> str:

class ResourceFile(TimeStampMixin):
"""
Resource Files Model -- Has a one to many relation
Resource Files Model -- Has a one to many relation
-- 1 resource can have multiple resource files.
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
Expand All @@ -338,7 +338,7 @@ class ResourceFile(TimeStampMixin):
embeddings_status_reason = models.CharField(max_length=1000, null=True)
def __str__(self) -> str:
return self.file.name

class DatasetV2FileReload(TimeStampMixin):
dataset_file = models.ForeignKey(DatasetV2File, on_delete=models.CASCADE, related_name="dataset_file")

Expand Down Expand Up @@ -385,7 +385,7 @@ class LangchainPgCollection(models.Model):
# resource_file = models.ForeignKey(ResourceFile, on_delete=models.PROTECT, related_name="resource_file_collections")

class Meta:
db_table = 'langchain_pg_collection'
db_table = 'langchain_pg_collection_datahub'


class LangchainPgEmbedding(models.Model):
Expand All @@ -397,7 +397,7 @@ class LangchainPgEmbedding(models.Model):
uuid = models.UUIDField(primary_key=True)

class Meta:
db_table = 'langchain_pg_embedding'
db_table = 'langchain_pg_embedding_datahub'

# def __str__(self):
# return f"LangchainPgEmbedding(uuid={self.uuid}, document={self.document})"
Expand Down
4 changes: 3 additions & 1 deletion datahub/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
StandardisationTemplate,
UserOrganizationMap,
)
from participant.models import Connectors, SupportTicket
# from participant.models import Connectors, SupportTicket
# TODO - REMOVED IMPORT TO CONNECTOR MODEL TO AVOID CIRCULAR IMPORT
from participant.models import SupportTicket
from utils.custom_exceptions import NotFoundException
from utils.embeddings_creation import VectorDBBuilder
from utils.file_operations import create_directory, move_directory
Expand Down
2 changes: 1 addition & 1 deletion datahub/tests/test_api_builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from rest_framework.reverse import reverse
from django.test import Client, TestCase
import json
from datahub.models import DatasetV2, Organization, UserOrganizationMap
from participant.models import DatasetV2, Organization, UserOrganizationMap
from accounts.models import User, UserRole
from participant.tests.test_util import TestUtils
from _pytest.monkeypatch import MonkeyPatch
Expand Down
2 changes: 1 addition & 1 deletion datahub/tests/test_datahub_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from _pytest.monkeypatch import MonkeyPatch
from django.db import models
from accounts.models import User, UserManager, UserRole
from datahub.models import Datasets, Organization, UserOrganizationMap
from participant.models import Datasets, Organization, UserOrganizationMap
from datahub.views import ParticipantViewSet
from django.test import Client, TestCase
from django.test.client import encode_multipart
Expand Down
Empty file added datasets/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions datasets/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions datasets/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class DatasetsConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'datasets'
Empty file added datasets/migrations/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions datasets/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from django.db import models
import os
import uuid
from datetime import timedelta
from email.mime import application

from django.conf import settings
from django.core.files.storage import Storage
from django.db import models
from django.utils import timezone
from pgvector.django import VectorField

from accounts.models import User
from core.base_models import TimeStampMixin
from core.constants import Constants
from participant.models import UserOrganizationMap
from utils.validators import (
validate_25MB_file_size,
validate_file_size,
validate_image_type,
)


APPROVAL_STATUS = (
("approved", "approved"),
("rejected", "rejected"),
("for_review", "for_review"),
)

def auto_str(cls):
def __str__(self):
return "%s" % (", ".join("%s=%s" % item for item in vars(self).items()))

cls.__str__ = __str__
return cls

# Create your models here.
@auto_str
class Datasets(TimeStampMixin):
"""Datasets model of all the users"""

id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
user_map = models.ForeignKey(UserOrganizationMap, on_delete=models.PROTECT)
name = models.CharField(max_length=255, unique=True)
description = models.CharField(max_length=500)
category = models.JSONField()
geography = models.CharField(max_length=255, blank=True)
crop_detail = models.CharField(max_length=255, null=True, blank=True) # field should update
constantly_update = models.BooleanField(default=False)
age_of_date = models.CharField(max_length=255, null=True, blank=True)
data_capture_start = models.DateTimeField(null=True, blank=True)
data_capture_end = models.DateTimeField(null=True, blank=True)
dataset_size = models.CharField(max_length=255, null=True, blank=True)
connector_availability = models.CharField(max_length=255, null=True, blank=True)
sample_dataset = models.FileField(
upload_to=settings.SAMPLE_DATASETS_URL,
blank=True,
)
status = models.BooleanField(default=True)
approval_status = models.CharField(max_length=255, null=True, choices=APPROVAL_STATUS, default="for_review")
is_enabled = models.BooleanField(default=True)
is_public = models.BooleanField(default=True)
remarks = models.CharField(max_length=1000, null=True, blank=True)

class Meta:
indexes = [models.Index(fields=["name"])]
Loading