diff --git a/snuba/clickhouse/columns.py b/snuba/clickhouse/columns.py index 1dd7a19c45..8a9d7dc504 100644 --- a/snuba/clickhouse/columns.py +++ b/snuba/clickhouse/columns.py @@ -8,6 +8,7 @@ ColumnType, Date, DateTime, + DateTime64, Enum, FixedString, FlattenedColumn, @@ -35,6 +36,7 @@ "ColumnType", "Date", "DateTime", + "DateTime64", "Enum", "FixedString", "FlattenedColumn", diff --git a/snuba/datasets/configuration/json_schema.py b/snuba/datasets/configuration/json_schema.py index c0b063fc38..aedf5b471c 100644 --- a/snuba/datasets/configuration/json_schema.py +++ b/snuba/datasets/configuration/json_schema.py @@ -189,12 +189,25 @@ def del_name_field(column_schema: dict[str, Any]) -> dict[str, Any]: }, ) +DATETIME64_SCHEMA = make_column_schema( + column_type={"const": "DateTime64"}, + args={ + "type": "object", + "properties": { + "precision": {"type": "integer"}, + "timezone": {"type": "string"}, + }, + "additionalProperties": False, + }, +) + SIMPLE_COLUMN_SCHEMAS = [ NUMBER_SCHEMA, FIXED_STRING_SCHEMA, NO_ARG_SCHEMA, AGGREGATE_FUNCTION_SCHEMA, ENUM_SCHEMA, + DATETIME64_SCHEMA, ] # Array inner types are the same as normal column types except they don't have a name diff --git a/snuba/datasets/configuration/utils.py b/snuba/datasets/configuration/utils.py index 725390293c..47d06b4a97 100644 --- a/snuba/datasets/configuration/utils.py +++ b/snuba/datasets/configuration/utils.py @@ -6,6 +6,7 @@ Array, Column, DateTime, + DateTime64, Enum, Float, Nested, @@ -115,6 +116,12 @@ def __parse_column_type(col: dict[str, Any]) -> ColumnType[SchemaModifiers]: column_type = FixedString(col["args"]["length"], modifiers) elif col["type"] == "Enum": column_type = Enum(col["args"]["values"], modifiers) + elif col["type"] == "DateTime64": + column_type = DateTime64( + precision=col["args"].get("precision", 3), + timezone=col["args"].get("timezone"), + modifiers=modifiers, + ) assert column_type is not None return column_type diff --git a/snuba/migrations/parse_schema.py b/snuba/migrations/parse_schema.py index 82db092d09..7cc556a72f 100644 --- a/snuba/migrations/parse_schema.py +++ b/snuba/migrations/parse_schema.py @@ -14,6 +14,7 @@ ColumnType, Date, DateTime, + DateTime64, Enum, FixedString, Float, @@ -29,7 +30,7 @@ type = primitive / lowcardinality / agg / nullable / array primitive = basic_type / uint / float / fixedstring / enum # DateTime must come before Date - basic_type = "DateTime" / "Date" / "IPv4" / "IPv6" / "String" / "UUID" + basic_type = "DateTime" / "DateTime64" / "Date" / "IPv4" / "IPv6" / "String" / "UUID" uint = "UInt" uint_size uint_size = "8" / "16" / "32" / "64" float = "Float" float_size @@ -71,6 +72,7 @@ def merge_modifiers( _TYPES: dict[str, type[ColumnType[MigrationModifiers]]] = { "Date": Date, "DateTime": DateTime, + "DateTime64": DateTime64, "IPv4": IPv4, "IPv6": IPv6, "String": String, diff --git a/snuba/utils/schemas.py b/snuba/utils/schemas.py index d80dc6de50..55c3578641 100644 --- a/snuba/utils/schemas.py +++ b/snuba/utils/schemas.py @@ -72,6 +72,7 @@ def has_modifier(self, modifier: Type[TypeModifier]) -> bool: TModifiers = TypeVar("TModifiers", bound=TypeModifiers) + # Unfortunately we cannot easily make these classes dataclasses (which # would provide a convenient default implementation for all __repr__ # and __eq__ methods and allow for immutability) while keeping the @@ -558,6 +559,53 @@ class DateTime(ColumnType[TModifiers]): pass +class DateTime64(ColumnType[TModifiers]): + def __init__( + self, + precision: int = 3, + timezone: Optional[str] = None, + modifiers: Optional[TModifiers] = None, + ) -> None: + assert precision <= 9 + super().__init__(modifiers) + self.timezone = timezone + self.precision = precision + + def _repr_content(self) -> str: + content = f"{self.precision}" + if self.timezone: + content += f", '{self.timezone}'" + return content + + def __eq__(self, other: object) -> bool: + return ( + self.__class__ == other.__class__ + and self.get_modifiers() + == cast( + DateTime64[TModifiers], + other, + ).get_modifiers() + and self.precision == cast(DateTime64[TModifiers], other).precision + and self.timezone == cast(DateTime64[TModifiers], other).timezone + ) + + def _for_schema_impl(self) -> str: + return f"DateTime64({self._repr_content()})" + + def set_modifiers(self, modifiers: Optional[TModifiers]) -> DateTime64[TModifiers]: + return DateTime64( + precision=self.precision, + timezone=self.timezone, + modifiers=modifiers, + ) + + def get_raw(self) -> DateTime64[TModifiers]: + return DateTime64( + precision=self.precision, + timezone=self.timezone, + ) + + class Enum(ColumnType[TModifiers]): def __init__( self, diff --git a/tests/clickhouse/test_columns.py b/tests/clickhouse/test_columns.py index b01508bdd0..c882dbd5ac 100644 --- a/tests/clickhouse/test_columns.py +++ b/tests/clickhouse/test_columns.py @@ -11,6 +11,7 @@ ColumnType, Date, DateTime, + DateTime64, Enum, FixedString, Float, @@ -86,6 +87,20 @@ "DateTime", id="datetimes", ), + pytest.param( + DateTime64(3, "America/New_York"), + DateTime64(3, "America/New_York"), + DateTime64(9, modifiers=Modifier(nullable=True)), + "DateTime64(3, 'America/New_York')", + id="datetime64s_tz", + ), + pytest.param( + DateTime64(3), + DateTime64(3), + DateTime64(9, modifiers=Modifier(nullable=True)), + "DateTime64(3)", + id="datetime64s_notz", + ), pytest.param( Array(String(Modifier(nullable=True))), Array(String()), diff --git a/tests/datasets/configuration/test_storage_loader.py b/tests/datasets/configuration/test_storage_loader.py index a728cc3df4..f8fb4c0250 100644 --- a/tests/datasets/configuration/test_storage_loader.py +++ b/tests/datasets/configuration/test_storage_loader.py @@ -8,6 +8,7 @@ Array, Column, DateTime, + DateTime64, Enum, Float, Nested, @@ -120,6 +121,11 @@ def test_column_parser(self) -> None: {"name": "float_col", "type": "Float", "args": {"size": 32}}, {"name": "string_col", "type": "String"}, {"name": "time_col", "type": "DateTime"}, + { + "name": "time64_col", + "type": "DateTime64", + "args": {"precision": 3, "timezone": "America/New_York"}, + }, { "name": "nested_col", "type": "Nested", @@ -171,6 +177,7 @@ def test_column_parser(self) -> None: Column("float_col", Float(32)), Column("string_col", String()), Column("time_col", DateTime()), + Column("time64_col", DateTime64(3, "America/New_York")), Column("nested_col", Nested([Column("sub_col", UInt(64))])), Column("func_col", AggregateFunction("uniqCombined64", [UInt(64)])), Column(