Skip to content

Commit

Permalink
Merge pull request #46 from lunakv/moved_cr
Browse files Browse the repository at this point in the history
Track moved rules
  • Loading branch information
lunakv authored Jun 12, 2023
2 parents 0f2646f + eaab767 commit 3eac2ac
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 86 deletions.
31 changes: 31 additions & 0 deletions alembic/versions/c4db41b6319c_moved_cr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""moved_cr
Revision ID: c4db41b6319c
Revises: 63d295d21692
Create Date: 2023-06-12 17:52:09.582704
"""
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision = "c4db41b6319c"
down_revision = "63d295d21692"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("cr_diffs", sa.Column("moves", postgresql.ARRAY(sa.Text(), dimensions=2), nullable=True))
op.add_column("cr_diffs_pending", sa.Column("moves", postgresql.ARRAY(sa.Text(), dimensions=2), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("cr_diffs_pending", "moves")
op.drop_column("cr_diffs", "moves")
# ### end Alembic commands ###
4 changes: 3 additions & 1 deletion app/database/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from sqlalchemy import Column, Date, ForeignKey, Integer, String, Text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import ARRAY, JSONB
from sqlalchemy.orm import declarative_base, relationship

Base = declarative_base()
Expand Down Expand Up @@ -76,6 +76,7 @@ class CrDiff(Base):
source_id = Column(ForeignKey("cr.id"), nullable=False)
dest_id = Column(ForeignKey("cr.id"), nullable=False)
bulletin_url = Column(Text)
moves = Column(ARRAY(Text, dimensions=2))

dest = relationship("Cr", primaryjoin="CrDiff.dest_id == Cr.id")
source = relationship("Cr", primaryjoin="CrDiff.source_id == Cr.id")
Expand All @@ -89,6 +90,7 @@ class PendingCrDiff(Base):
source_id = Column(ForeignKey("cr.id"), nullable=False)
dest_id = Column(ForeignKey("cr_pending.id", ondelete="CASCADE"), nullable=False)
changes = Column(JSONB(astext_type=Text()))
moves = Column(ARRAY(Text, dimensions=2))

dest = relationship("PendingCr")
source = relationship("Cr")
Expand Down
50 changes: 17 additions & 33 deletions app/database/operations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import datetime
from typing import Tuple, Type
from typing import Type

from sqlalchemy import select
from sqlalchemy.orm import Session, aliased
Expand Down Expand Up @@ -64,17 +64,20 @@ def set_pending(db: Session, resource: str, link: str) -> None:
db.add(PendingRedirect(resource=resource, link=link))


def get_cr_diff(db: Session, old_code: str, new_code: str) -> CrDiff | None:
def get_cr_diff(db: Session, old_code: str | None, new_code: str | None) -> CrDiff | None:
src = aliased(Cr)
dst = aliased(Cr)

stmt = (
select(CrDiff)
.join(src, CrDiff.source)
.join(dst, CrDiff.dest)
.where(src.set_code == old_code)
.where(dst.set_code == new_code)
)
stmt = select(CrDiff).join(src, CrDiff.source).join(dst, CrDiff.dest)

if not old_code and not new_code:
stmt = stmt.order_by(CrDiff.creation_day.desc()).limit(1)
else:
if old_code:
stmt = stmt.where(src.set_code == old_code)
if new_code:
stmt = stmt.where(dst.set_code == new_code)

return db.execute(stmt).scalar_one_or_none()


Expand Down Expand Up @@ -104,6 +107,7 @@ def apply_pending_cr_and_diff(db: Session, set_code: str, set_name: str) -> None
source_id=pendingDiff.source_id,
dest=newCr,
changes=pendingDiff.changes,
moves=pendingDiff.moves,
)
db.add(newCr)
db.add(newDiff)
Expand Down Expand Up @@ -137,28 +141,6 @@ def get_pending_mtr_diff(db: Session) -> PendingMtrDiff:
return db.execute(select(PendingMtrDiff).join(PendingMtrDiff.dest)).scalar_one_or_none()


def get_latest_cr_diff_code(db: Session) -> (str, str):
stmt = select(CrDiff).join(CrDiff.dest).order_by(Cr.creation_day.desc())
diff: CrDiff = db.execute(stmt).scalars().first()
return diff.source.set_code, diff.dest.set_code


def get_cr_diff_codes(db: Session, old_code: str | None, new_code: str | None) -> Tuple[str, str] | None:
stmt = select(CrDiff)
if old_code:
stmt = stmt.join(Cr, CrDiff.source).where(Cr.set_code == old_code)
elif new_code:
stmt = stmt.join(Cr, CrDiff.dest).where(Cr.set_code == new_code)
else:
return None

diff: CrDiff = db.execute(stmt).scalar_one_or_none()
if not diff:
return None

return diff.source.set_code, diff.dest.set_code


def get_pending_cr_diff(db: Session) -> PendingCrDiff | None:
return db.execute(select(PendingCrDiff)).scalar_one_or_none()

Expand Down Expand Up @@ -200,10 +182,12 @@ def get_mtr_diff_metadata(db: Session):
return db.execute(stmt).fetchall()


def set_pending_cr_and_diff(db: Session, new_rules: dict, new_diff: list, file_name: str):
def set_pending_cr_and_diff(db: Session, new_rules: dict, new_diff: list, file_name: str, new_moves: list):
new_cr = PendingCr(creation_day=datetime.date.today(), data=new_rules, file_name=file_name)
curr_cr_id: Cr = db.execute(select(Cr.id).order_by(Cr.creation_day.desc())).scalars().first()
new_diff = PendingCrDiff(creation_day=datetime.date.today(), source_id=curr_cr_id, dest=new_cr, changes=new_diff)
new_diff = PendingCrDiff(
creation_day=datetime.date.today(), source_id=curr_cr_id, dest=new_cr, changes=new_diff, moves=new_moves
)
db.add(new_cr)
db.add(new_diff)

Expand Down
12 changes: 8 additions & 4 deletions app/difftool/diffmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

@dataclass
class Diff:
diff: list[(any, any)]
matches: list[(str, str)]
diff: list[(any, any)] # list of changed item in a release
moved: list[(str, str)] # list of moved (but identical in content) items in a release


class DiffMaker:
Expand All @@ -27,15 +27,19 @@ def diff(self, old_doc, new_doc) -> Diff:
"""
matches = self.matcher.align_matches(old_doc, new_doc)
diffs = []
moved = []
for match_old, match_new in matches:
old_item = old_doc.get(match_old)
new_item = new_doc.get(match_new)
diff = self.differ.diff_items(old_item, new_item)
if diff:
diffs.append({"old": diff[0], "new": diff[1]})
elif match_old != match_new:
moved.append((match_old, match_new))

sorted = self.sorter.sort(diffs)
return Diff(sorted, matches)
sorted_diffs = self.sorter.sort_diffs(diffs)
sorted_moves = self.sorter.sort_moved(moved)
return Diff(sorted_diffs, sorted_moves)


class CRDiffMaker(DiffMaker):
Expand Down
32 changes: 24 additions & 8 deletions app/difftool/diffsorter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,24 @@ class DiffSorter(ABC):
"""

@abstractmethod
def item_to_key(self, item) -> int:
def diff_to_sort_key(self, item) -> int:
"""
Method for calculating the sort key of a given item
Method for calculating the sort key of a given diff item
"""
pass

def sort(self, diffs: list) -> list:
return sorted(diffs, key=self.item_to_key)
@abstractmethod
def move_to_sort_key(self, key: tuple[str, str]) -> int | str:
"""
Method for calculating the sort key of a given move item
"""
pass

def sort_diffs(self, diffs: list) -> list:
return sorted(diffs, key=self.diff_to_sort_key)

def sort_moved(self, moved: list[tuple[str, str]]) -> list[tuple[str, str]]:
return sorted(moved, key=self.move_to_sort_key)


class CRDiffSorter(DiffSorter):
Expand All @@ -24,7 +34,7 @@ class CRDiffSorter(DiffSorter):
"""

@staticmethod
def num_to_key(num) -> int:
def rule_num_to_sort_key(num: str) -> int:
"""
Converts rule number to an integer sort key.
Expand All @@ -42,15 +52,21 @@ def num_to_key(num) -> int:
# rules only go to 9xx, 1000 multiplier is enough. letters only go 1-27, so 100 multiplier is enough there too
return int(rule) * 100_000 + int(subrule) * 100 + letter_val

def item_to_key(self, item) -> int:
def diff_to_sort_key(self, item) -> int:
# the diff rows are sorted primarily by the new rule, the old rule number only being used for deletions
sort_by = item["new"] or item["old"]
return CRDiffSorter.num_to_key(sort_by["ruleNum"])
return CRDiffSorter.rule_num_to_sort_key(sort_by["ruleNum"])

def move_to_sort_key(self, item: tuple[str, str]) -> int:
return CRDiffSorter.rule_num_to_sort_key(item[1])


class MtrDiffSorter(DiffSorter):
def item_to_key(self, item: dict) -> int:
def diff_to_sort_key(self, item: dict) -> int:
comparison_source = item.get("new") or item.get("old") or {}
s = comparison_source.get("section") or 0
ss = comparison_source.get("subsection") or 0
return s * 100 + ss

def move_to_sort_key(self, key: tuple[str, str]) -> str:
return key[1]
2 changes: 1 addition & 1 deletion app/parsing/cr/refresh_cr.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def refresh_cr(link):
# TODO add to database instead?
KeywordCache().replace(result["keywords"])
GlossaryCache().replace(result["glossary"])
ops.set_pending_cr_and_diff(session, result["rules"], diff_result.diff, file_name)
ops.set_pending_cr_and_diff(session, result["rules"], diff_result.diff, file_name, diff_result.moved)


if __name__ == "__main__":
Expand Down
52 changes: 22 additions & 30 deletions app/routers/diff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import date
from typing import Union

from fastapi import APIRouter, Depends, HTTPException, Path, Response
from fastapi import APIRouter, Depends, HTTPException, Path, Query, Response
from fastapi.responses import RedirectResponse
from sqlalchemy.orm import Session

Expand All @@ -22,32 +22,41 @@ class MtrDiffError(Error):


@router.get(
"/cr/{old}-{new}",
"/cr",
summary="CR diff",
response_model=Union[CrDiffError, CRDiff],
responses={200: {"model": CRDiff}, 404: {"model": CrDiffError}},
)
async def cr_diff(
response: Response,
old: str = Path(description="Set code of the old set.", min_length=3, max_length=5),
new: str = Path(description="Set code of the new set", min_length=3, max_length=5),
old: str | None = Query(None, description="Set code of the old set.", min_length=3, max_length=5),
new: str | None = Query(None, description="Set code of the new set", min_length=3, max_length=5),
db: Session = Depends(get_db),
):
"""
Returns a diff of the CR between the two specified sets. Diffs only exist for neighboring CR releases. The path
parameters are **not** case sensitive.
Returns a diff of the CR between the two specified sets. Diffs only exist for neighboring CR releases.
The set code query parameters are **not** case sensitive. If both are supplied, the server attempts to find a
diff between exactly those two sets. If only one is supplied, a diff with that set at the provided end is found.
If neither is supplied, the latest CR diff is returned.
The `changes` property is an ordered array of diff items, with each item consisting of the `old` and `new`
versions of a rule. If a new rule is added, `old` is `null`. If an old rule is deleted, `new` is `null`.
Otherwise, the differing parts of the `ruleText` string are wrapped between "<<<<" and ">>>>". Rules with
identical text but changed rule number aren't part of this array, and neither are rules whose only change in text
was a reference to a renumbered rule.
`source_set` and `dest_set` contain full names of the sets being diffed, and the `nav` property stores set codes
of diffs immediately preceding/following this one
The `moves` property contains an ordered array representing the rules that changed their number but didn't change
their content. This property may be `null` or missing. If present, each element in the array is a two-item tuple
of strings, containing the old and new numbers for the given rule. No items that are part of the `changes`
property are included here. In particular, this means that both elements of each two-tuple are always valid strings.
`sourceSet` and `destSet` contain full names of the sets being diffed, and `sourceCode` and `destCode` contain
the canonical set codes of those sets.
"""
old = old.upper()
new = new.upper()
old = old and old.upper()
new = new and new.upper()

diff = ops.get_cr_diff(db, old, new)
if diff is None:
response.status_code = 404
Expand All @@ -57,34 +66,17 @@ async def cr_diff(
"new": new,
}

def format_nav(codes):
if not codes:
return None
return {"old": codes[0], "new": codes[1]}

nav = {
"next": format_nav(ops.get_cr_diff_codes(db, new, None)),
"prev": format_nav(ops.get_cr_diff_codes(db, None, old)),
}

return {
"creationDay": diff.creation_day,
"changes": diff.changes,
"sourceSet": diff.source.set_name,
"sourceCode": diff.source.set_code,
"destSet": diff.dest.set_name,
"nav": nav,
"destCode": diff.dest.set_code,
"moves": diff.moves,
}


@router.get("/cr/", status_code=307, summary="Latest CR diff", responses={307: {"content": None}})
async def latest_cr_diff(db: Session = Depends(get_db)):
"""
Redirects to the latest CR diff available
"""
old, new = ops.get_latest_cr_diff_code(db)
return RedirectResponse(f"./{old}-{new}")


@router.get(
"/mtr/{effective_date}",
summary="MTR diff",
Expand Down
4 changes: 3 additions & 1 deletion app/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,11 @@ class CRDiffNav:
class CRDiff:
creation_day: datetime.date = Field(..., alias="creationDay")
source_set: str = Field(..., alias="sourceSet")
source_code: str = Field(..., alias="sourceCode")
dest_set: str = Field(..., alias="destSet")
dest_code: str = Field(..., alias="destCode")
changes: list[CRDiffItem] = Field(...)
nav: CRDiffNav = Field(..., description="Connections to neighboring diffs")
moves: list[tuple[str, str]] = Field(description="List of moved rules")


@dataclass(config=Config)
Expand Down
15 changes: 7 additions & 8 deletions create_cr_and_diff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import asyncio
import json
import os
import sys

from app.difftool.diffmaker import CRDiffMaker
from app.parsing.cr import extract_cr
Expand Down Expand Up @@ -53,7 +52,7 @@ async def diff_save(old, new, forced_matches=None):
with open(os.path.join(diff_dir, diff_code + ".json"), "w") as file:
json.dump(dff.diff, file)
with open(os.path.join(maps_dir, diff_code + ".json"), "w") as file:
json.dump(dff.matches, file)
json.dump(dff.moved, file)
print(o_code, n_code)


Expand All @@ -67,14 +66,14 @@ async def diffall():

cr_in_dir = "app/static/raw_docs/cr"
cr_out_dir = "./gen/cr"
diff_dir = "./gen/diff"
diff_dir = "./gen/diff_unchecked"
maps_dir = "./gen/map"
gloss_dir = "./gen/gloss"
key_dir = "./gen/keywords"

if __name__ == "__main__":
# asyncio.run(diffall())
old = sys.argv[1]
new = sys.argv[2]
forced = []
asyncio.run(diff_save(old, new, forced))
asyncio.run(diffall())
# old = sys.argv[1]
# new = sys.argv[2]
# forced = []
# asyncio.run(diff_save(old, new, forced))

0 comments on commit 3eac2ac

Please sign in to comment.