Skip to content

Commit

Permalink
Merge branch 'main' into standardize-returned-response
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Oct 30, 2023
2 parents 1ce9127 + cb82171 commit 66fbfd0
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 17 deletions.
3 changes: 2 additions & 1 deletion carbonplan_offsets_db/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .app_metadata import metadata
from .logging import get_logger
from .routers import charts, credits, files, health, projects
from .routers import charts, clips, credits, files, health, projects

logger = get_logger()

Expand All @@ -27,6 +27,7 @@ def create_application() -> FastAPI:
application.include_router(projects.router, prefix='/projects', tags=['projects'])
application.include_router(credits.router, prefix='/credits', tags=['credits'])
application.include_router(charts.router, prefix='/charts', tags=['charts'])
application.include_router(clips.router, prefix='/clips', tags=['clips'])
application.include_router(files.router, prefix='/files', tags=['files'])

return application
Expand Down
64 changes: 52 additions & 12 deletions carbonplan_offsets_db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@
from .schemas import FileCategory, FileStatus, Pagination


class Clip(SQLModel, table=True):
id: int = Field(default=None, primary_key=True)
project_id: str = Field(description='Project id used by registry system')
published_at: datetime.datetime = Field(description='Date the clip was published')
title: str | None = Field(description='Title of the clip')
url: pydantic.AnyUrl | None = Field(description='URL to the clip')
tags: list[str] | None = Field(
description='Tags associated with the clip', sa_column=Column(postgresql.ARRAY(String()))
)
notes: str | None = Field(description='Notes associated with the clip')
is_waybacked: bool = Field(default=False, description='Whether the clip is a waybacked clip')
article_type: str = Field(description='Type of clip', default='unknown')


class File(SQLModel, table=True):
id: int = Field(default=None, primary_key=True)
url: pydantic.AnyUrl
Expand All @@ -21,7 +35,7 @@ class File(SQLModel, table=True):
category: FileCategory = Field(description='Category of file', default='unknown')


class Project(SQLModel, table=True):
class ProjectBase(SQLModel):
project_id: str = Field(
description='Project id used by registry system', unique=True, primary_key=True
)
Expand All @@ -47,6 +61,23 @@ class Project(SQLModel, table=True):
project_url: pydantic.HttpUrl | None = Field(description='URL to project details')


class Project(ProjectBase, table=True):
...


class ProjectWithClips(ProjectBase):
clips: list[Clip] = Field(default=[], description='List of clips associated with project')


class Credit(SQLModel, table=True):
id: int = Field(default=None, primary_key=True)
project_id: str = Field(description='Project id used by registry system')
quantity: int = Field(description='Number of credits', sa_column=Column(BigInteger()))
vintage: int | None = Field(description='Vintage year of credits')
transaction_date: datetime.date | None = Field(description='Date of transaction')
transaction_type: str | None = Field(description='Type of transaction')


# Schema for 'project' table
project_schema = pa.DataFrameSchema(
{
Expand All @@ -70,16 +101,6 @@ class Project(SQLModel, table=True):
}
)


class Credit(SQLModel, table=True):
id: int = Field(default=None, primary_key=True)
project_id: str = Field(description='Project id used by registry system')
quantity: int = Field(description='Number of credits', sa_column=Column(BigInteger()))
vintage: int | None = Field(description='Vintage year of credits')
transaction_date: datetime.date | None = Field(description='Date of transaction')
transaction_type: str | None = Field(description='Type of transaction')


# Schema for 'credit' table
credit_schema = pa.DataFrameSchema(
{
Expand All @@ -94,10 +115,25 @@ class Credit(SQLModel, table=True):
}
)

# Schema for 'clip' table
clip_schema = pa.DataFrameSchema(
{
'id': pa.Column(pa.Int, nullable=False),
'project_id': pa.Column(pa.String, nullable=False),
'published_at': pa.Column(pd.DatetimeTZDtype(tz='UTC'), nullable=True),
'title': pa.Column(pa.String, nullable=True),
'url': pa.Column(pa.String, nullable=True),
'tags': pa.Column(pa.Object, nullable=True),
'notes': pa.Column(pa.String, nullable=True),
'is_waybacked': pa.Column(pa.Bool, nullable=True),
'article_type': pa.Column(pa.String, nullable=True),
}
)


class PaginatedProjects(pydantic.BaseModel):
pagination: Pagination
data: list[Project]
data: list[ProjectWithClips]


class PaginatedCredits(pydantic.BaseModel):
Expand Down Expand Up @@ -127,3 +163,7 @@ class ProjectBinnedCreditsTotals(pydantic.BaseModel):
class PaginatedBinnedCreditTotals(pydantic.BaseModel):
pagination: Pagination
data: list[ProjectBinnedCreditsTotals]

class ClipWithPagination(pydantic.BaseModel):
pagination: Pagination
data: list[Clip]
83 changes: 83 additions & 0 deletions carbonplan_offsets_db/routers/clips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import datetime

from fastapi import APIRouter, Depends, Query, Request
from sqlmodel import Session, or_

from ..database import get_session
from ..logging import get_logger
from ..models import Clip, ClipWithPagination
from ..query_helpers import apply_filters, apply_sorting, handle_pagination
from ..schemas import Pagination

router = APIRouter()
logger = get_logger()


@router.get('/', response_model=ClipWithPagination)
def get_clips(
request: Request,
project_id: list[str] | None = Query(None, description='Project ID'),
tags: list[str] | None = Query(None, description='Tags'),
article_type: list[str] | None = Query(None, description='Article type'),
published_at_from: datetime.date
| datetime.datetime
| None = Query(None, description='Published at from'),
published_at_to: datetime.date
| datetime.datetime
| None = Query(None, description='Published at to'),
search: str
| None = Query(
None,
description='Case insensitive search string. Currently searches on `project_id` and `title` fields only.',
),
current_page: int = Query(1, description='Page number', ge=1),
per_page: int = Query(100, description='Items per page', le=200, ge=1),
sort: list[str] = Query(
default=['project_id'],
description='List of sorting parameters in the format `field_name` or `+field_name` for ascending order or `-field_name` for descending order.',
),
session: Session = Depends(get_session),
):
"""
Get clips associated with a project
"""
logger.info(f'Getting clips: {request.url}')

filters = [
('article_type', article_type, 'ilike', Clip),
('tags', tags, 'ANY', Clip),
('published_at', published_at_from, '>=', Clip),
('published_at', published_at_to, '<=', Clip),
('project_id', project_id, 'ilike', Clip),
]

query = session.query(Clip)

for attribute, values, operation, model in filters:
query = apply_filters(
query=query, model=model, attribute=attribute, values=values, operation=operation
)

# Handle 'search' filter separately due to its unique logic
if search:
search_pattern = f'%{search}%'
query = query.filter(
or_(Clip.project_id.ilike(search_pattern), Clip.title.ilike(search_pattern))
)

if sort:
query = apply_sorting(query=query, sort=sort, model=Clip, primary_key='id')

total_entries, current_page, total_pages, next_page, results = handle_pagination(
query=query, current_page=current_page, per_page=per_page, request=request
)

return ClipWithPagination(
pagination=Pagination(
total_entries=total_entries,
current_page=current_page,
total_pages=total_pages,
next_page=next_page,
),
data=results,
)
12 changes: 9 additions & 3 deletions carbonplan_offsets_db/routers/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..database import get_session
from ..logging import get_logger
from ..models import PaginatedProjects, Project
from ..models import Clip, Project, ProjectWithClips, ProjectWithPagination
from ..query_helpers import apply_filters, apply_sorting, handle_pagination
from ..schemas import Pagination, Registries

Expand Down Expand Up @@ -49,7 +50,7 @@ def get_projects(

logger.info(f'Getting projects: {request.url}')

query = session.query(Project)
query = session.query(Project).join(Clip, Clip.project_id == Project.project_id, isouter=True)

filters = [
('registry', registry, 'ilike', Project),
Expand Down Expand Up @@ -97,7 +98,7 @@ def get_projects(

@router.get(
'/{project_id}',
response_model=Project,
response_model=ProjectWithClips,
summary='Get project details by project_id',
)
def get_project(
Expand All @@ -107,7 +108,12 @@ def get_project(
"""Get a project by registry and project_id"""
logger.info('Getting project %s', project_id)

if project_obj := session.query(Project).filter_by(project_id=project_id).one_or_none():
if (
project_obj := session.query(Project)
.filter_by(project_id=project_id)
.join(Clip, Clip.project_id == Project.project_id, isouter=True)
.one_or_none()
):
return project_obj
else:
raise HTTPException(
Expand Down
1 change: 1 addition & 0 deletions carbonplan_offsets_db/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class FileStatus(str, enum.Enum):
class FileCategory(str, enum.Enum):
projects = 'projects'
credits = 'credits'
clips = 'clips'
unknown = 'unknown'


Expand Down
15 changes: 14 additions & 1 deletion carbonplan_offsets_db/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sqlmodel import ARRAY, BigInteger, Boolean, Date, DateTime, String

from .logging import get_logger
from .models import credit_schema, project_schema
from .models import clip_schema, credit_schema, project_schema

logger = get_logger()

Expand Down Expand Up @@ -71,6 +71,19 @@ def process_files(*, engine, session, files: list):
process_dataframe(df, 'project', engine, project_dtype_dict)
update_file_status(file, session, 'success')

elif file.category == 'clips':
logger.info(f'📚 Loading clip file: {file.url}')
data = (
pd.read_parquet(file.url, engine='fastparquet')
.reset_index(drop=True)
.reset_index()
.rename(columns={'index': 'id'})
)
df = clip_schema.validate(data)
clip_dtype_dict = {'tags': ARRAY(String)}
process_dataframe(df, 'clip', engine, clip_dtype_dict)
update_file_status(file, session, 'success')

else:
logger.info(f'❓ Unknown file category: {file.category}. Skipping file {file.url}')

Expand Down
41 changes: 41 additions & 0 deletions migrations/versions/f099826eeacd_add_clips_to_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""add clips to database
Revision ID: f099826eeacd
Revises: cd307c691d94
Create Date: 2023-10-21 19:19:30.263534
"""
import sqlalchemy as sa
import sqlmodel
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = 'f099826eeacd'
down_revision = 'cd307c691d94'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'clip',
sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True),
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('project_id', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.Column('published_at', sa.DateTime(), nullable=False),
sa.Column('title', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('url', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('notes', sqlmodel.sql.sqltypes.AutoString(), nullable=True),
sa.Column('is_waybacked', sa.Boolean(), nullable=False),
sa.Column('article_type', sqlmodel.sql.sqltypes.AutoString(), nullable=False),
sa.PrimaryKeyConstraint('id'),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('clip')
# ### end Alembic commands ###
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def setup_post(test_app):
'url': 's3://carbonplan-share/offsets-db-testing-data/final/credits-augmented.parquet',
'category': 'credits',
},
{
'url': 's3://carbonplan-share/offsets-db-testing-data/final/clips.parquet',
'category': 'clips',
},
]

# Perform POST request
Expand Down
15 changes: 15 additions & 0 deletions tests/test_clips.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest


def test_get_clips(test_app):
response = test_app.get('/clips/')
assert response.status_code == 200
assert isinstance(response.json()['data'], list)


@pytest.mark.parametrize('article_type', ['foo'])
@pytest.mark.parametrize('tags', ['foo'])
def test_get_filtered_clips(test_app, article_type, tags):
response = test_app.get(f'/clips/?article_type={article_type}&tags={tags}&search=carbon')
assert response.status_code == 200
assert isinstance(response.json()['data'], list)
4 changes: 4 additions & 0 deletions tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ def test_submit_bad_file(test_app):
'url': 's3://carbonplan-share/offsets-db-testing-data/final/credits-augmented.parquet',
'category': 'credits',
},
{
'url': 's3://carbonplan-share/offsets-db-testing-data/final/clips.parquet',
'category': 'clips',
},
],
],
)
Expand Down
1 change: 1 addition & 0 deletions tests/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_get_project(test_app):
data = response.json()['data'][0]
project_id = data['project_id']
registry = data['registry']
assert isinstance(data['clips'], list)

response = test_app.get(f'/projects/{project_id}')
assert response.status_code == 200
Expand Down

0 comments on commit 66fbfd0

Please sign in to comment.