diff --git a/alembic/versions/c4db41b6319c_moved_cr.py b/alembic/versions/c4db41b6319c_moved_cr.py new file mode 100644 index 0000000..ed71811 --- /dev/null +++ b/alembic/versions/c4db41b6319c_moved_cr.py @@ -0,0 +1,31 @@ +"""moved_cr + +Revision ID: c4db41b6319c +Revises: 63d295d21692 +Create Date: 2023-06-12 17:52:09.582704 + +""" +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "c4db41b6319c" +down_revision = "63d295d21692" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("cr_diffs", sa.Column("moves", postgresql.ARRAY(sa.Text(), dimensions=2), nullable=True)) + op.add_column("cr_diffs_pending", sa.Column("moves", postgresql.ARRAY(sa.Text(), dimensions=2), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("cr_diffs_pending", "moves") + op.drop_column("cr_diffs", "moves") + # ### end Alembic commands ### diff --git a/app/database/models.py b/app/database/models.py index f003f8a..4da9685 100644 --- a/app/database/models.py +++ b/app/database/models.py @@ -1,5 +1,5 @@ from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text -from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.dialects.postgresql import ARRAY, JSONB from sqlalchemy.orm import declarative_base, relationship Base = declarative_base() @@ -76,6 +76,7 @@ class CrDiff(Base): source_id = Column(ForeignKey("cr.id"), nullable=False) dest_id = Column(ForeignKey("cr.id"), nullable=False) bulletin_url = Column(Text) + moves = Column(ARRAY(Text, dimensions=2)) dest = relationship("Cr", primaryjoin="CrDiff.dest_id == Cr.id") source = relationship("Cr", primaryjoin="CrDiff.source_id == Cr.id") @@ -89,6 +90,7 @@ class PendingCrDiff(Base): source_id = Column(ForeignKey("cr.id"), nullable=False) dest_id = Column(ForeignKey("cr_pending.id", ondelete="CASCADE"), nullable=False) changes = Column(JSONB(astext_type=Text())) + moves = Column(ARRAY(Text, dimensions=2)) dest = relationship("PendingCr") source = relationship("Cr") diff --git a/app/database/operations.py b/app/database/operations.py index 0ab4830..c9f8441 100644 --- a/app/database/operations.py +++ b/app/database/operations.py @@ -1,5 +1,5 @@ import datetime -from typing import Tuple, Type +from typing import Type from sqlalchemy import select from sqlalchemy.orm import Session, aliased @@ -64,17 +64,20 @@ def set_pending(db: Session, resource: str, link: str) -> None: db.add(PendingRedirect(resource=resource, link=link)) -def get_cr_diff(db: Session, old_code: str, new_code: str) -> CrDiff | None: +def get_cr_diff(db: Session, old_code: str | None, new_code: str | None) -> CrDiff | None: src = aliased(Cr) dst = aliased(Cr) - stmt = ( - select(CrDiff) - .join(src, CrDiff.source) - .join(dst, CrDiff.dest) - .where(src.set_code == old_code) - .where(dst.set_code == new_code) - ) + stmt = select(CrDiff).join(src, CrDiff.source).join(dst, CrDiff.dest) + + if not old_code and not new_code: + stmt = stmt.order_by(CrDiff.creation_day.desc()).limit(1) + else: + if old_code: + stmt = stmt.where(src.set_code == old_code) + if new_code: + stmt = stmt.where(dst.set_code == new_code) + return db.execute(stmt).scalar_one_or_none() @@ -104,6 +107,7 @@ def apply_pending_cr_and_diff(db: Session, set_code: str, set_name: str) -> None source_id=pendingDiff.source_id, dest=newCr, changes=pendingDiff.changes, + moves=pendingDiff.moves, ) db.add(newCr) db.add(newDiff) @@ -137,28 +141,6 @@ def get_pending_mtr_diff(db: Session) -> PendingMtrDiff: return db.execute(select(PendingMtrDiff).join(PendingMtrDiff.dest)).scalar_one_or_none() -def get_latest_cr_diff_code(db: Session) -> (str, str): - stmt = select(CrDiff).join(CrDiff.dest).order_by(Cr.creation_day.desc()) - diff: CrDiff = db.execute(stmt).scalars().first() - return diff.source.set_code, diff.dest.set_code - - -def get_cr_diff_codes(db: Session, old_code: str | None, new_code: str | None) -> Tuple[str, str] | None: - stmt = select(CrDiff) - if old_code: - stmt = stmt.join(Cr, CrDiff.source).where(Cr.set_code == old_code) - elif new_code: - stmt = stmt.join(Cr, CrDiff.dest).where(Cr.set_code == new_code) - else: - return None - - diff: CrDiff = db.execute(stmt).scalar_one_or_none() - if not diff: - return None - - return diff.source.set_code, diff.dest.set_code - - def get_pending_cr_diff(db: Session) -> PendingCrDiff | None: return db.execute(select(PendingCrDiff)).scalar_one_or_none() @@ -200,10 +182,12 @@ def get_mtr_diff_metadata(db: Session): return db.execute(stmt).fetchall() -def set_pending_cr_and_diff(db: Session, new_rules: dict, new_diff: list, file_name: str): +def set_pending_cr_and_diff(db: Session, new_rules: dict, new_diff: list, file_name: str, new_moves: list): new_cr = PendingCr(creation_day=datetime.date.today(), data=new_rules, file_name=file_name) curr_cr_id: Cr = db.execute(select(Cr.id).order_by(Cr.creation_day.desc())).scalars().first() - new_diff = PendingCrDiff(creation_day=datetime.date.today(), source_id=curr_cr_id, dest=new_cr, changes=new_diff) + new_diff = PendingCrDiff( + creation_day=datetime.date.today(), source_id=curr_cr_id, dest=new_cr, changes=new_diff, moves=new_moves + ) db.add(new_cr) db.add(new_diff) diff --git a/app/difftool/diffmaker.py b/app/difftool/diffmaker.py index aaac85d..1eb0050 100644 --- a/app/difftool/diffmaker.py +++ b/app/difftool/diffmaker.py @@ -7,8 +7,8 @@ @dataclass class Diff: - diff: list[(any, any)] - matches: list[(str, str)] + diff: list[(any, any)] # list of changed item in a release + moved: list[(str, str)] # list of moved (but identical in content) items in a release class DiffMaker: @@ -27,15 +27,19 @@ def diff(self, old_doc, new_doc) -> Diff: """ matches = self.matcher.align_matches(old_doc, new_doc) diffs = [] + moved = [] for match_old, match_new in matches: old_item = old_doc.get(match_old) new_item = new_doc.get(match_new) diff = self.differ.diff_items(old_item, new_item) if diff: diffs.append({"old": diff[0], "new": diff[1]}) + elif match_old != match_new: + moved.append((match_old, match_new)) - sorted = self.sorter.sort(diffs) - return Diff(sorted, matches) + sorted_diffs = self.sorter.sort_diffs(diffs) + sorted_moves = self.sorter.sort_moved(moved) + return Diff(sorted_diffs, sorted_moves) class CRDiffMaker(DiffMaker): diff --git a/app/difftool/diffsorter.py b/app/difftool/diffsorter.py index 4333580..7ae07e0 100644 --- a/app/difftool/diffsorter.py +++ b/app/difftool/diffsorter.py @@ -8,14 +8,24 @@ class DiffSorter(ABC): """ @abstractmethod - def item_to_key(self, item) -> int: + def diff_to_sort_key(self, item) -> int: """ - Method for calculating the sort key of a given item + Method for calculating the sort key of a given diff item """ pass - def sort(self, diffs: list) -> list: - return sorted(diffs, key=self.item_to_key) + @abstractmethod + def move_to_sort_key(self, key: tuple[str, str]) -> int | str: + """ + Method for calculating the sort key of a given move item + """ + pass + + def sort_diffs(self, diffs: list) -> list: + return sorted(diffs, key=self.diff_to_sort_key) + + def sort_moved(self, moved: list[tuple[str, str]]) -> list[tuple[str, str]]: + return sorted(moved, key=self.move_to_sort_key) class CRDiffSorter(DiffSorter): @@ -24,7 +34,7 @@ class CRDiffSorter(DiffSorter): """ @staticmethod - def num_to_key(num) -> int: + def rule_num_to_sort_key(num: str) -> int: """ Converts rule number to an integer sort key. @@ -42,15 +52,21 @@ def num_to_key(num) -> int: # rules only go to 9xx, 1000 multiplier is enough. letters only go 1-27, so 100 multiplier is enough there too return int(rule) * 100_000 + int(subrule) * 100 + letter_val - def item_to_key(self, item) -> int: + def diff_to_sort_key(self, item) -> int: # the diff rows are sorted primarily by the new rule, the old rule number only being used for deletions sort_by = item["new"] or item["old"] - return CRDiffSorter.num_to_key(sort_by["ruleNum"]) + return CRDiffSorter.rule_num_to_sort_key(sort_by["ruleNum"]) + + def move_to_sort_key(self, item: tuple[str, str]) -> int: + return CRDiffSorter.rule_num_to_sort_key(item[1]) class MtrDiffSorter(DiffSorter): - def item_to_key(self, item: dict) -> int: + def diff_to_sort_key(self, item: dict) -> int: comparison_source = item.get("new") or item.get("old") or {} s = comparison_source.get("section") or 0 ss = comparison_source.get("subsection") or 0 return s * 100 + ss + + def move_to_sort_key(self, key: tuple[str, str]) -> str: + return key[1] diff --git a/app/parsing/cr/refresh_cr.py b/app/parsing/cr/refresh_cr.py index 07211cf..830bc3f 100644 --- a/app/parsing/cr/refresh_cr.py +++ b/app/parsing/cr/refresh_cr.py @@ -85,7 +85,7 @@ async def refresh_cr(link): # TODO add to database instead? KeywordCache().replace(result["keywords"]) GlossaryCache().replace(result["glossary"]) - ops.set_pending_cr_and_diff(session, result["rules"], diff_result.diff, file_name) + ops.set_pending_cr_and_diff(session, result["rules"], diff_result.diff, file_name, diff_result.moved) if __name__ == "__main__": diff --git a/app/routers/diff.py b/app/routers/diff.py index 9eea4da..b570e9f 100644 --- a/app/routers/diff.py +++ b/app/routers/diff.py @@ -1,7 +1,7 @@ from datetime import date from typing import Union -from fastapi import APIRouter, Depends, HTTPException, Path, Response +from fastapi import APIRouter, Depends, HTTPException, Path, Query, Response from fastapi.responses import RedirectResponse from sqlalchemy.orm import Session @@ -22,20 +22,23 @@ class MtrDiffError(Error): @router.get( - "/cr/{old}-{new}", + "/cr", summary="CR diff", response_model=Union[CrDiffError, CRDiff], responses={200: {"model": CRDiff}, 404: {"model": CrDiffError}}, ) async def cr_diff( response: Response, - old: str = Path(description="Set code of the old set.", min_length=3, max_length=5), - new: str = Path(description="Set code of the new set", min_length=3, max_length=5), + old: str | None = Query(None, description="Set code of the old set.", min_length=3, max_length=5), + new: str | None = Query(None, description="Set code of the new set", min_length=3, max_length=5), db: Session = Depends(get_db), ): """ - Returns a diff of the CR between the two specified sets. Diffs only exist for neighboring CR releases. The path - parameters are **not** case sensitive. + Returns a diff of the CR between the two specified sets. Diffs only exist for neighboring CR releases. + + The set code query parameters are **not** case sensitive. If both are supplied, the server attempts to find a + diff between exactly those two sets. If only one is supplied, a diff with that set at the provided end is found. + If neither is supplied, the latest CR diff is returned. The `changes` property is an ordered array of diff items, with each item consisting of the `old` and `new` versions of a rule. If a new rule is added, `old` is `null`. If an old rule is deleted, `new` is `null`. @@ -43,11 +46,17 @@ async def cr_diff( identical text but changed rule number aren't part of this array, and neither are rules whose only change in text was a reference to a renumbered rule. - `source_set` and `dest_set` contain full names of the sets being diffed, and the `nav` property stores set codes - of diffs immediately preceding/following this one + The `moves` property contains an ordered array representing the rules that changed their number but didn't change + their content. This property may be `null` or missing. If present, each element in the array is a two-item tuple + of strings, containing the old and new numbers for the given rule. No items that are part of the `changes` + property are included here. In particular, this means that both elements of each two-tuple are always valid strings. + + `sourceSet` and `destSet` contain full names of the sets being diffed, and `sourceCode` and `destCode` contain + the canonical set codes of those sets. """ - old = old.upper() - new = new.upper() + old = old and old.upper() + new = new and new.upper() + diff = ops.get_cr_diff(db, old, new) if diff is None: response.status_code = 404 @@ -57,34 +66,17 @@ async def cr_diff( "new": new, } - def format_nav(codes): - if not codes: - return None - return {"old": codes[0], "new": codes[1]} - - nav = { - "next": format_nav(ops.get_cr_diff_codes(db, new, None)), - "prev": format_nav(ops.get_cr_diff_codes(db, None, old)), - } - return { "creationDay": diff.creation_day, "changes": diff.changes, "sourceSet": diff.source.set_name, + "sourceCode": diff.source.set_code, "destSet": diff.dest.set_name, - "nav": nav, + "destCode": diff.dest.set_code, + "moves": diff.moves, } -@router.get("/cr/", status_code=307, summary="Latest CR diff", responses={307: {"content": None}}) -async def latest_cr_diff(db: Session = Depends(get_db)): - """ - Redirects to the latest CR diff available - """ - old, new = ops.get_latest_cr_diff_code(db) - return RedirectResponse(f"./{old}-{new}") - - @router.get( "/mtr/{effective_date}", summary="MTR diff", diff --git a/app/utils/models.py b/app/utils/models.py index 59c55ea..49f93ae 100644 --- a/app/utils/models.py +++ b/app/utils/models.py @@ -84,9 +84,11 @@ class CRDiffNav: class CRDiff: creation_day: datetime.date = Field(..., alias="creationDay") source_set: str = Field(..., alias="sourceSet") + source_code: str = Field(..., alias="sourceCode") dest_set: str = Field(..., alias="destSet") + dest_code: str = Field(..., alias="destCode") changes: list[CRDiffItem] = Field(...) - nav: CRDiffNav = Field(..., description="Connections to neighboring diffs") + moves: list[tuple[str, str]] = Field(description="List of moved rules") @dataclass(config=Config) diff --git a/create_cr_and_diff.py b/create_cr_and_diff.py index 7283b72..c128d5d 100644 --- a/create_cr_and_diff.py +++ b/create_cr_and_diff.py @@ -1,7 +1,6 @@ import asyncio import json import os -import sys from app.difftool.diffmaker import CRDiffMaker from app.parsing.cr import extract_cr @@ -53,7 +52,7 @@ async def diff_save(old, new, forced_matches=None): with open(os.path.join(diff_dir, diff_code + ".json"), "w") as file: json.dump(dff.diff, file) with open(os.path.join(maps_dir, diff_code + ".json"), "w") as file: - json.dump(dff.matches, file) + json.dump(dff.moved, file) print(o_code, n_code) @@ -67,14 +66,14 @@ async def diffall(): cr_in_dir = "app/static/raw_docs/cr" cr_out_dir = "./gen/cr" -diff_dir = "./gen/diff" +diff_dir = "./gen/diff_unchecked" maps_dir = "./gen/map" gloss_dir = "./gen/gloss" key_dir = "./gen/keywords" if __name__ == "__main__": - # asyncio.run(diffall()) - old = sys.argv[1] - new = sys.argv[2] - forced = [] - asyncio.run(diff_save(old, new, forced)) + asyncio.run(diffall()) + # old = sys.argv[1] + # new = sys.argv[2] + # forced = [] + # asyncio.run(diff_save(old, new, forced))