diff --git a/.github/workflows/test-branches.yml b/.github/workflows/test-branches.yml index 4450ddb6c..211611ce8 100644 --- a/.github/workflows/test-branches.yml +++ b/.github/workflows/test-branches.yml @@ -42,6 +42,9 @@ jobs: - name: Typecheck with mypy run: | make mypy + - name: Validate no pending migrations + run: | + make check-no-pending-migrations - name: Test with pytest run: | make test diff --git a/.github/workflows/test-prs.yml b/.github/workflows/test-prs.yml index 3b7c270fe..7fa13b5fc 100644 --- a/.github/workflows/test-prs.yml +++ b/.github/workflows/test-prs.yml @@ -22,6 +22,9 @@ jobs: - name: Typecheck with mypy run: | make mypy + - name: Validate no pending migrations + run: | + make check-no-pending-migrations - name: Test with pytest run: | make test diff --git a/Makefile b/Makefile index 885b847bb..fcea2a58e 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,10 @@ schemas: # Generates json files migration: .env docker compose run app flask db migrate -m 'Migration' +.PHONY: check-no-pending-migrations +check-no-pending-migrations: .env + docker compose run app flask db check + .PHONY: merge-migrations merge-migrations: .env docker compose run app flask db merge heads diff --git a/codecov.yml b/codecov.yml index b5d971b2d..e9c19b276 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,22 +1,2 @@ coverage: - status: - project: - default: false - patch: - default: false - grouping: - paths: - - 'src/seer/grouping/' - automation: - paths: - - 'src/seer/automation/' - severity: - paths: - - 'src/seer/severity/' - trend_detection: - paths: - - 'src/seer/trend_detection/' - app: - paths: - - 'src/seer/app.py' -comment: false +comment: true diff --git a/src/migrations/alembic.ini b/src/migrations/alembic.ini index ec9d45c26..502a22590 100644 --- a/src/migrations/alembic.ini +++ b/src/migrations/alembic.ini @@ -48,3 +48,6 @@ formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S + +[exclude] +matches = grouping_records_p.+,grouping_records_new_p.+ diff --git a/src/migrations/env.py b/src/migrations/env.py index 54179952a..b4a7a4a36 100644 --- a/src/migrations/env.py +++ b/src/migrations/env.py @@ -1,8 +1,10 @@ import logging +import re from logging.config import fileConfig from alembic import context from flask import current_app +from sqlalchemy.sql.schema import SchemaItem # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives): connectable = get_engine() with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=get_metadata(), **conf_args) + context.configure( + connection=connection, + target_metadata=get_metadata(), + include_object=include_object, + **conf_args, + ) with context.begin_transaction(): context.run_migrations() +def get_excludes_from_config(config_, type_="tables"): + excludes = config_.get(type_, None) + if excludes is not None: + excludes = excludes.split(",") + return excludes + + +excluded_matches = (config.get_section("exclude") or {}).get("matches", "").split(",") + + +def include_object( + obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem | None +): + if name: + for pattern in excluded_matches: + if re.match(pattern, name): + return False + + return True + + if context.is_offline_mode(): run_migrations_offline() else: diff --git a/src/migrations/versions/95b4ba4f731d_migration.py b/src/migrations/versions/95b4ba4f731d_migration.py new file mode 100644 index 000000000..366c90b07 --- /dev/null +++ b/src/migrations/versions/95b4ba4f731d_migration.py @@ -0,0 +1,33 @@ +"""Migration + +Revision ID: 95b4ba4f731d +Revises: 2597db647e9a +Create Date: 2024-07-17 23:35:18.871569 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "95b4ba4f731d" +down_revision = "2597db647e9a" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT") + op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq") + op.execute( + "ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')" + ) + op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id") + op.execute( + "SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)" + ) + + +def downgrade(): + op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT") + op.execute("DROP SEQUENCE grouping_records_id_seq") + op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER") diff --git a/src/seer/db.py b/src/seer/db.py index 2d1c5e3f3..516b2eba4 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -1,6 +1,7 @@ import contextlib import datetime import json +from typing import Any import sqlalchemy from flask_migrate import Migrate @@ -10,15 +11,18 @@ from sqlalchemy import ( JSON, BigInteger, + Connection, DateTime, ForeignKey, Index, Integer, + Sequence, String, UniqueConstraint, delete, func, select, + text, ) from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker @@ -231,28 +235,47 @@ class DbPrIdToAutofixRunIdMapping(Base): ) +def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None: + for i in range(100): + connection.execute( + text( + f""" + CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records + FOR VALUES WITH (MODULUS 100, REMAINDER {i}); + """ + ) + ) + + class DbGroupingRecord(Base): __tablename__ = "grouping_records" - id: Mapped[int] = mapped_column(Integer, primary_key=True) - project_id: Mapped[int] = mapped_column(BigInteger, nullable=False) - message: Mapped[str] = mapped_column(String, nullable=False) - error_type: Mapped[str] = mapped_column(String, nullable=True) - stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) - hash: Mapped[str] = mapped_column( - String(32), nullable=False, default="00000000000000000000000000000000" - ) - __table_args__ = ( Index( - "ix_grouping_records_stacktrace_embedding_hnsw", + "ix_grouping_records_new_stacktrace_embedding_hnsw", "stacktrace_embedding", postgresql_using="hnsw", postgresql_with={"m": 16, "ef_construction": 200}, postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"}, ), Index( - "ix_grouping_records_project_id", + "ix_grouping_records_new_project_id", "project_id", ), - UniqueConstraint("project_id", "hash", name="u_project_id_hash"), + UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"), + { + "postgresql_partition_by": "HASH (project_id)", + "listeners": [("after_create", create_grouping_partition)], + }, + ) + + id: Mapped[int] = mapped_column( + BigInteger, + Sequence("grouping_records_id_seq"), + primary_key=True, + server_default=text("nextval('grouping_records_id_seq')"), ) + project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) + message: Mapped[str] = mapped_column(String, nullable=False) + error_type: Mapped[str] = mapped_column(String, nullable=True) + stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) + hash: Mapped[str] = mapped_column(String(32), nullable=False)