From 97c67a1b09b375d2d28945c454caacd8fb358ddf Mon Sep 17 00:00:00 2001 From: Tillman Elser Date: Wed, 17 Jul 2024 16:54:31 -0700 Subject: [PATCH 1/8] make id bigint sequence --- .../versions/95b4ba4f731d_migration.py | 33 +++++++++++++++++++ src/seer/db.py | 4 +-- 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 src/migrations/versions/95b4ba4f731d_migration.py diff --git a/src/migrations/versions/95b4ba4f731d_migration.py b/src/migrations/versions/95b4ba4f731d_migration.py new file mode 100644 index 000000000..366c90b07 --- /dev/null +++ b/src/migrations/versions/95b4ba4f731d_migration.py @@ -0,0 +1,33 @@ +"""Migration + +Revision ID: 95b4ba4f731d +Revises: 2597db647e9a +Create Date: 2024-07-17 23:35:18.871569 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "95b4ba4f731d" +down_revision = "2597db647e9a" +branch_labels = None +depends_on = None + + +def upgrade(): + op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE BIGINT") + op.execute("CREATE SEQUENCE IF NOT EXISTS grouping_records_id_seq") + op.execute( + "ALTER TABLE grouping_records ALTER COLUMN id SET DEFAULT nextval('grouping_records_id_seq')" + ) + op.execute("ALTER SEQUENCE grouping_records_id_seq OWNED BY grouping_records.id") + op.execute( + "SELECT setval('grouping_records_id_seq', COALESCE((SELECT MAX(id) FROM grouping_records), 1), true)" + ) + + +def downgrade(): + op.execute("ALTER TABLE grouping_records ALTER COLUMN id DROP DEFAULT") + op.execute("DROP SEQUENCE grouping_records_id_seq") + op.execute("ALTER TABLE grouping_records ALTER COLUMN id TYPE INTEGER") diff --git a/src/seer/db.py b/src/seer/db.py index 2d1c5e3f3..bd0493729 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -233,8 +233,8 @@ class DbPrIdToAutofixRunIdMapping(Base): class DbGroupingRecord(Base): __tablename__ = "grouping_records" - id: Mapped[int] = mapped_column(Integer, primary_key=True) - project_id: Mapped[int] = mapped_column(BigInteger, nullable=False) + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) message: Mapped[str] = mapped_column(String, nullable=False) error_type: Mapped[str] = mapped_column(String, nullable=True) stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) From a444269675b1d8d6dd3f4d4dd08380b6ef86904e Mon Sep 17 00:00:00 2001 From: Tillman Elser Date: Wed, 17 Jul 2024 17:07:25 -0700 Subject: [PATCH 2/8] start fixing db.py --- src/seer/db.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/seer/db.py b/src/seer/db.py index bd0493729..21a29d0e1 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -233,26 +233,25 @@ class DbPrIdToAutofixRunIdMapping(Base): class DbGroupingRecord(Base): __tablename__ = "grouping_records" + __table_args__ = {"postgres_partition_by": "HASH (project_id)"} id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) message: Mapped[str] = mapped_column(String, nullable=False) error_type: Mapped[str] = mapped_column(String, nullable=True) stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) - hash: Mapped[str] = mapped_column( - String(32), nullable=False, default="00000000000000000000000000000000" - ) + hash: Mapped[str] = mapped_column(String(32), nullable=False) __table_args__ = ( Index( - "ix_grouping_records_stacktrace_embedding_hnsw", + "ix_grouping_records_new_stacktrace_embedding_hnsw", "stacktrace_embedding", postgresql_using="hnsw", postgresql_with={"m": 16, "ef_construction": 200}, postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"}, ), Index( - "ix_grouping_records_project_id", + "ix_grouping_records_new_project_id", "project_id", ), - UniqueConstraint("project_id", "hash", name="u_project_id_hash"), + UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"), ) From 5c1814f612b2a84238eb5ef78c3a81558e8aebab Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 17:55:05 -0700 Subject: [PATCH 3/8] Vroom vroom --- src/seer/db.py | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/seer/db.py b/src/seer/db.py index 21a29d0e1..77361e792 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -231,16 +231,19 @@ class DbPrIdToAutofixRunIdMapping(Base): ) -class DbGroupingRecord(Base): - __tablename__ = "grouping_records" - __table_args__ = {"postgres_partition_by": "HASH (project_id)"} - id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) +class DbGroupingRecordBase: + id: Mapped[int] = mapped_column( + BigInteger, primary_key=True, server_default="nextval('grouping_records_id_seq')" + ) project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) message: Mapped[str] = mapped_column(String, nullable=False) error_type: Mapped[str] = mapped_column(String, nullable=True) stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) hash: Mapped[str] = mapped_column(String(32), nullable=False) + +class DbGroupingRecord(DbGroupingRecordBase, Base): + __tablename__ = "grouping_records" __table_args__ = ( Index( "ix_grouping_records_new_stacktrace_embedding_hnsw", @@ -254,4 +257,35 @@ class DbGroupingRecord(Base): "project_id", ), UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"), + {"postgresql_partition_by": "HASH (project_id)"}, ) + + +globals().update( + { + f"DbGroupingRecord{i}": type( + f"DbGroupingRecord{i}", + (DbGroupingRecordBase, Base), + dict( + __tablename__=f"grouping_records_p{i}", + __table_args__=( + Index( + f"grouping_records_new_p{i}_stacktrace_embedding_idx", + "stacktrace_embedding", + postgresql_using="hnsw", + postgresql_with={"m": 16, "ef_construction": 200}, + postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"}, + ), + Index( + f"grouping_records_new_p{i}_project_id_idx", + "project_id", + ), + UniqueConstraint( + "project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key" + ), + ), + ), + ) + for i in range(100) + } +) From 3e5702b443d24e003d348784017e1fa46caf7b32 Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 18:01:26 -0700 Subject: [PATCH 4/8] Adding check for pending migrations --- .github/workflows/test-branches.yml | 3 +++ .github/workflows/test-prs.yml | 3 +++ Makefile | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/.github/workflows/test-branches.yml b/.github/workflows/test-branches.yml index 4450ddb6c..211611ce8 100644 --- a/.github/workflows/test-branches.yml +++ b/.github/workflows/test-branches.yml @@ -42,6 +42,9 @@ jobs: - name: Typecheck with mypy run: | make mypy + - name: Validate no pending migrations + run: | + make check-no-pending-migrations - name: Test with pytest run: | make test diff --git a/.github/workflows/test-prs.yml b/.github/workflows/test-prs.yml index 3b7c270fe..7fa13b5fc 100644 --- a/.github/workflows/test-prs.yml +++ b/.github/workflows/test-prs.yml @@ -22,6 +22,9 @@ jobs: - name: Typecheck with mypy run: | make mypy + - name: Validate no pending migrations + run: | + make check-no-pending-migrations - name: Test with pytest run: | make test diff --git a/Makefile b/Makefile index 885b847bb..fcea2a58e 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,10 @@ schemas: # Generates json files migration: .env docker compose run app flask db migrate -m 'Migration' +.PHONY: check-no-pending-migrations +check-no-pending-migrations: .env + docker compose run app flask db check + .PHONY: merge-migrations merge-migrations: .env docker compose run app flask db merge heads From 8cfcf0e683ce411ddd42eca91e0ff7320ef67055 Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 18:10:10 -0700 Subject: [PATCH 5/8] let's gooooooo --- src/seer/db.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/seer/db.py b/src/seer/db.py index 77361e792..b07607369 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -14,11 +14,13 @@ ForeignKey, Index, Integer, + Sequence, String, UniqueConstraint, delete, func, select, + text, ) from sqlalchemy.dialects.postgresql import insert from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker @@ -233,7 +235,10 @@ class DbPrIdToAutofixRunIdMapping(Base): class DbGroupingRecordBase: id: Mapped[int] = mapped_column( - BigInteger, primary_key=True, server_default="nextval('grouping_records_id_seq')" + BigInteger, + Sequence("grouping_records_id_seq"), + primary_key=True, + server_default=text("nextval('grouping_records_id_seq')"), ) project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) message: Mapped[str] = mapped_column(String, nullable=False) From 5b041e2c4c58ed306b0d0f9a12e6fdd8bcfb5a81 Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 20:55:03 -0700 Subject: [PATCH 6/8] fix --- src/migrations/alembic.ini | 3 ++ src/migrations/env.py | 30 +++++++++++++++- src/seer/db.py | 71 +++++++++++++++----------------------- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/src/migrations/alembic.ini b/src/migrations/alembic.ini index ec9d45c26..502a22590 100644 --- a/src/migrations/alembic.ini +++ b/src/migrations/alembic.ini @@ -48,3 +48,6 @@ formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S + +[exclude] +matches = grouping_records_p.+,grouping_records_new_p.+ diff --git a/src/migrations/env.py b/src/migrations/env.py index 54179952a..7a8ff2afd 100644 --- a/src/migrations/env.py +++ b/src/migrations/env.py @@ -1,8 +1,10 @@ import logging +import re from logging.config import fileConfig from alembic import context from flask import current_app +from sqlalchemy.sql.schema import SchemaItem # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -94,12 +96,38 @@ def process_revision_directives(context, revision, directives): connectable = get_engine() with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=get_metadata(), **conf_args) + context.configure( + connection=connection, + target_metadata=get_metadata(), + include_object=include_object, + **conf_args, + ) with context.begin_transaction(): context.run_migrations() +def get_excludes_from_config(config_, type_="tables"): + excludes = config_.get(type_, None) + if excludes is not None: + excludes = excludes.split(",") + return excludes + + +excluded_matches = config.get_section("exclude").get("matches", "").split(",") + + +def include_object( + obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem +): + if name: + for pattern in excluded_matches: + if re.match(pattern, name): + return False + + return True + + if context.is_offline_mode(): run_migrations_offline() else: diff --git a/src/seer/db.py b/src/seer/db.py index b07607369..516b2eba4 100644 --- a/src/seer/db.py +++ b/src/seer/db.py @@ -1,6 +1,7 @@ import contextlib import datetime import json +from typing import Any import sqlalchemy from flask_migrate import Migrate @@ -10,6 +11,7 @@ from sqlalchemy import ( JSON, BigInteger, + Connection, DateTime, ForeignKey, Index, @@ -233,21 +235,19 @@ class DbPrIdToAutofixRunIdMapping(Base): ) -class DbGroupingRecordBase: - id: Mapped[int] = mapped_column( - BigInteger, - Sequence("grouping_records_id_seq"), - primary_key=True, - server_default=text("nextval('grouping_records_id_seq')"), - ) - project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) - message: Mapped[str] = mapped_column(String, nullable=False) - error_type: Mapped[str] = mapped_column(String, nullable=True) - stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) - hash: Mapped[str] = mapped_column(String(32), nullable=False) +def create_grouping_partition(target: Any, connection: Connection, **kw: Any) -> None: + for i in range(100): + connection.execute( + text( + f""" + CREATE TABLE grouping_records_p{i} PARTITION OF grouping_records + FOR VALUES WITH (MODULUS 100, REMAINDER {i}); + """ + ) + ) -class DbGroupingRecord(DbGroupingRecordBase, Base): +class DbGroupingRecord(Base): __tablename__ = "grouping_records" __table_args__ = ( Index( @@ -262,35 +262,20 @@ class DbGroupingRecord(DbGroupingRecordBase, Base): "project_id", ), UniqueConstraint("project_id", "hash", name="u_project_id_hash_composite"), - {"postgresql_partition_by": "HASH (project_id)"}, + { + "postgresql_partition_by": "HASH (project_id)", + "listeners": [("after_create", create_grouping_partition)], + }, ) - -globals().update( - { - f"DbGroupingRecord{i}": type( - f"DbGroupingRecord{i}", - (DbGroupingRecordBase, Base), - dict( - __tablename__=f"grouping_records_p{i}", - __table_args__=( - Index( - f"grouping_records_new_p{i}_stacktrace_embedding_idx", - "stacktrace_embedding", - postgresql_using="hnsw", - postgresql_with={"m": 16, "ef_construction": 200}, - postgresql_ops={"stacktrace_embedding": "vector_cosine_ops"}, - ), - Index( - f"grouping_records_new_p{i}_project_id_idx", - "project_id", - ), - UniqueConstraint( - "project_id", "hash", name=f"grouping_records_new_p{i}_project_id_hash_key" - ), - ), - ), - ) - for i in range(100) - } -) + id: Mapped[int] = mapped_column( + BigInteger, + Sequence("grouping_records_id_seq"), + primary_key=True, + server_default=text("nextval('grouping_records_id_seq')"), + ) + project_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, nullable=False) + message: Mapped[str] = mapped_column(String, nullable=False) + error_type: Mapped[str] = mapped_column(String, nullable=True) + stacktrace_embedding: Mapped[Vector] = mapped_column(Vector(768), nullable=False) + hash: Mapped[str] = mapped_column(String(32), nullable=False) From 28ef0f7813877d62b01f7af7a726a9d2c44914b1 Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 20:56:05 -0700 Subject: [PATCH 7/8] Codecov comment --- codecov.yml | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/codecov.yml b/codecov.yml index b5d971b2d..e9c19b276 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,22 +1,2 @@ coverage: - status: - project: - default: false - patch: - default: false - grouping: - paths: - - 'src/seer/grouping/' - automation: - paths: - - 'src/seer/automation/' - severity: - paths: - - 'src/seer/severity/' - trend_detection: - paths: - - 'src/seer/trend_detection/' - app: - paths: - - 'src/seer/app.py' -comment: false +comment: true From 4e4c9e1164304f0732b022ef28270c6f6e96c7fd Mon Sep 17 00:00:00 2001 From: Zachary Collins Date: Wed, 17 Jul 2024 21:06:25 -0700 Subject: [PATCH 8/8] Typing fixes --- src/migrations/env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/migrations/env.py b/src/migrations/env.py index 7a8ff2afd..b4a7a4a36 100644 --- a/src/migrations/env.py +++ b/src/migrations/env.py @@ -114,11 +114,11 @@ def get_excludes_from_config(config_, type_="tables"): return excludes -excluded_matches = config.get_section("exclude").get("matches", "").split(",") +excluded_matches = (config.get_section("exclude") or {}).get("matches", "").split(",") def include_object( - obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem + obj: SchemaItem, name: str | None, type_: str, reflected: bool, compare_to: SchemaItem | None ): if name: for pattern in excluded_matches: