Skip to content

Commit

Permalink
feat: Add support for DateTime64 column type (#5896)
Browse files Browse the repository at this point in the history
  • Loading branch information
phacops authored May 13, 2024
1 parent 177e23e commit be369fd
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 1 deletion.
2 changes: 2 additions & 0 deletions snuba/clickhouse/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ColumnType,
Date,
DateTime,
DateTime64,
Enum,
FixedString,
FlattenedColumn,
Expand Down Expand Up @@ -35,6 +36,7 @@
"ColumnType",
"Date",
"DateTime",
"DateTime64",
"Enum",
"FixedString",
"FlattenedColumn",
Expand Down
13 changes: 13 additions & 0 deletions snuba/datasets/configuration/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,25 @@ def del_name_field(column_schema: dict[str, Any]) -> dict[str, Any]:
},
)

DATETIME64_SCHEMA = make_column_schema(
column_type={"const": "DateTime64"},
args={
"type": "object",
"properties": {
"precision": {"type": "integer"},
"timezone": {"type": "string"},
},
"additionalProperties": False,
},
)

SIMPLE_COLUMN_SCHEMAS = [
NUMBER_SCHEMA,
FIXED_STRING_SCHEMA,
NO_ARG_SCHEMA,
AGGREGATE_FUNCTION_SCHEMA,
ENUM_SCHEMA,
DATETIME64_SCHEMA,
]

# Array inner types are the same as normal column types except they don't have a name
Expand Down
7 changes: 7 additions & 0 deletions snuba/datasets/configuration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Array,
Column,
DateTime,
DateTime64,
Enum,
Float,
Nested,
Expand Down Expand Up @@ -115,6 +116,12 @@ def __parse_column_type(col: dict[str, Any]) -> ColumnType[SchemaModifiers]:
column_type = FixedString(col["args"]["length"], modifiers)
elif col["type"] == "Enum":
column_type = Enum(col["args"]["values"], modifiers)
elif col["type"] == "DateTime64":
column_type = DateTime64(
precision=col["args"].get("precision", 3),
timezone=col["args"].get("timezone"),
modifiers=modifiers,
)
assert column_type is not None
return column_type

Expand Down
4 changes: 3 additions & 1 deletion snuba/migrations/parse_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
ColumnType,
Date,
DateTime,
DateTime64,
Enum,
FixedString,
Float,
Expand All @@ -29,7 +30,7 @@
type = primitive / lowcardinality / agg / nullable / array
primitive = basic_type / uint / float / fixedstring / enum
# DateTime must come before Date
basic_type = "DateTime" / "Date" / "IPv4" / "IPv6" / "String" / "UUID"
basic_type = "DateTime" / "DateTime64" / "Date" / "IPv4" / "IPv6" / "String" / "UUID"
uint = "UInt" uint_size
uint_size = "8" / "16" / "32" / "64"
float = "Float" float_size
Expand Down Expand Up @@ -71,6 +72,7 @@ def merge_modifiers(
_TYPES: dict[str, type[ColumnType[MigrationModifiers]]] = {
"Date": Date,
"DateTime": DateTime,
"DateTime64": DateTime64,
"IPv4": IPv4,
"IPv6": IPv6,
"String": String,
Expand Down
48 changes: 48 additions & 0 deletions snuba/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def has_modifier(self, modifier: Type[TypeModifier]) -> bool:

TModifiers = TypeVar("TModifiers", bound=TypeModifiers)


# Unfortunately we cannot easily make these classes dataclasses (which
# would provide a convenient default implementation for all __repr__
# and __eq__ methods and allow for immutability) while keeping the
Expand Down Expand Up @@ -558,6 +559,53 @@ class DateTime(ColumnType[TModifiers]):
pass


class DateTime64(ColumnType[TModifiers]):
def __init__(
self,
precision: int = 3,
timezone: Optional[str] = None,
modifiers: Optional[TModifiers] = None,
) -> None:
assert precision <= 9
super().__init__(modifiers)
self.timezone = timezone
self.precision = precision

def _repr_content(self) -> str:
content = f"{self.precision}"
if self.timezone:
content += f", '{self.timezone}'"
return content

def __eq__(self, other: object) -> bool:
return (
self.__class__ == other.__class__
and self.get_modifiers()
== cast(
DateTime64[TModifiers],
other,
).get_modifiers()
and self.precision == cast(DateTime64[TModifiers], other).precision
and self.timezone == cast(DateTime64[TModifiers], other).timezone
)

def _for_schema_impl(self) -> str:
return f"DateTime64({self._repr_content()})"

def set_modifiers(self, modifiers: Optional[TModifiers]) -> DateTime64[TModifiers]:
return DateTime64(
precision=self.precision,
timezone=self.timezone,
modifiers=modifiers,
)

def get_raw(self) -> DateTime64[TModifiers]:
return DateTime64(
precision=self.precision,
timezone=self.timezone,
)


class Enum(ColumnType[TModifiers]):
def __init__(
self,
Expand Down
15 changes: 15 additions & 0 deletions tests/clickhouse/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ColumnType,
Date,
DateTime,
DateTime64,
Enum,
FixedString,
Float,
Expand Down Expand Up @@ -86,6 +87,20 @@
"DateTime",
id="datetimes",
),
pytest.param(
DateTime64(3, "America/New_York"),
DateTime64(3, "America/New_York"),
DateTime64(9, modifiers=Modifier(nullable=True)),
"DateTime64(3, 'America/New_York')",
id="datetime64s_tz",
),
pytest.param(
DateTime64(3),
DateTime64(3),
DateTime64(9, modifiers=Modifier(nullable=True)),
"DateTime64(3)",
id="datetime64s_notz",
),
pytest.param(
Array(String(Modifier(nullable=True))),
Array(String()),
Expand Down
7 changes: 7 additions & 0 deletions tests/datasets/configuration/test_storage_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Array,
Column,
DateTime,
DateTime64,
Enum,
Float,
Nested,
Expand Down Expand Up @@ -120,6 +121,11 @@ def test_column_parser(self) -> None:
{"name": "float_col", "type": "Float", "args": {"size": 32}},
{"name": "string_col", "type": "String"},
{"name": "time_col", "type": "DateTime"},
{
"name": "time64_col",
"type": "DateTime64",
"args": {"precision": 3, "timezone": "America/New_York"},
},
{
"name": "nested_col",
"type": "Nested",
Expand Down Expand Up @@ -171,6 +177,7 @@ def test_column_parser(self) -> None:
Column("float_col", Float(32)),
Column("string_col", String()),
Column("time_col", DateTime()),
Column("time64_col", DateTime64(3, "America/New_York")),
Column("nested_col", Nested([Column("sub_col", UInt(64))])),
Column("func_col", AggregateFunction("uniqCombined64", [UInt(64)])),
Column(
Expand Down

0 comments on commit be369fd

Please sign in to comment.