Skip to content

Commit

Permalink
fix: Numbers to floats (#106)
Browse files Browse the repository at this point in the history
Closes #103

- Replaces #104
- This implementations is based on this comment
#103 (comment).
Wise treats all `number` json types as floats in snowflake.
- As part of this I reverted the changes from
#97 since they arent
needed anymore. We'll let the json schema validation enforce this vs
having strict snowflake schema enforcement.

Warning: I think this is a breaking change because I believe the target
will now try to update the type of existing columns from NUMBER to
DOUBLE which throws a `cannot change column NUM_COL from type
NUMBER(38,0) to FLOAT`.

@edgarrmondragon how do you think we should handle this in terms of
breaking changes? Should this be the 1.0 release?
  • Loading branch information
pnadolny13 authored Aug 14, 2023
1 parent 045b614 commit ae8f1aa
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 36 deletions.
30 changes: 2 additions & 28 deletions target_snowflake/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
from target_snowflake.snowflake_types import NUMBER, TIMESTAMP_NTZ, VARIANT

SNOWFLAKE_MAX_STRING_LENGTH = 16777216
SNOWFLAKE_MAX_NUMBER_PRECISION = 38
SNOWFLAKE_MAX_NUMBER_SCALE = 0

class TypeMap:
def __init__(self, operator, map_value, match_value=None):
Expand Down Expand Up @@ -95,7 +93,7 @@ def _convert_type(self, sql_type):
if isinstance(sql_type, sct.TIMESTAMP_NTZ):
return TIMESTAMP_NTZ
elif isinstance(sql_type, sct.NUMBER):
return NUMBER(precision=sql_type.precision, scale=sql_type.scale)
return NUMBER
elif isinstance(sql_type, sct.VARIANT):
return VARIANT
else:
Expand Down Expand Up @@ -220,28 +218,6 @@ def _conform_max_length(jsonschema_type):
jsonschema_type["maxLength"] = SNOWFLAKE_MAX_STRING_LENGTH
return jsonschema_type

@staticmethod
def _get_numeric_precision(jsonschema_type):
return SNOWFLAKE_MAX_NUMBER_PRECISION

@staticmethod
def _get_numeric_scale(jsonschema_type):
precision = SNOWFLAKE_MAX_NUMBER_SCALE
if jsonschema_type.get("exclusiveMinimum"):
if str(jsonschema_type[attrib])[-1] == 1:
return len(str(jsonschema_type[attrib]).split(".")[1]) - 1
else:
return len(str(jsonschema_type[attrib]).split(".")[1])
attribs_to_check = [
"multipleOf",
"min",
"max",
]
for attrib in attribs_to_check:
if jsonschema_type.get(attrib):
precision = max(precision, len(str(jsonschema_type[attrib]).split(".")[1]))
return precision

@staticmethod
def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
"""Return a JSON Schema representation of the provided type.
Expand All @@ -260,8 +236,6 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
# snowflake max and default varchar length
# https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
maxlength = jsonschema_type.get("maxLength", SNOWFLAKE_MAX_STRING_LENGTH)
num_precision = SnowflakeConnector._get_numeric_precision(jsonschema_type)
num_scale = SnowflakeConnector._get_numeric_scale(jsonschema_type)
# define type maps
string_submaps = [
TypeMap(eq, TIMESTAMP_NTZ(), "date-time"),
Expand All @@ -273,7 +247,7 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
TypeMap(th._jsonschema_type_check, NUMBER(), ("integer",)),
TypeMap(th._jsonschema_type_check, VARIANT(), ("object",)),
TypeMap(th._jsonschema_type_check, VARIANT(), ("array",)),
TypeMap(th._jsonschema_type_check, NUMBER(precision=num_precision, scale=num_scale), ("number",)),
TypeMap(th._jsonschema_type_check, sct.DOUBLE(), ("number",)),
]
# apply type maps
if th._jsonschema_type_check(jsonschema_type, ("string",)):
Expand Down
9 changes: 3 additions & 6 deletions tests/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def validate(self) -> None:
table_schema = connector.get_table(table)
expected_types = {
"id": sct.NUMBER,
"a1": sct.NUMBER,
"a1": sct.DOUBLE,
"a2": sct.STRING,
"a3": sqlalchemy.types.BOOLEAN,
"a4": sct.VARIANT,
Expand Down Expand Up @@ -435,7 +435,8 @@ def validate(self) -> None:
expected_types = {
"id": sct.NUMBER,
"col_max_length_str": sct.STRING,
"col_multiple_of": sct.NUMBER,
"col_multiple_of": sct.DOUBLE,
"col_multiple_of_int": sct.DOUBLE,
"_sdc_extracted_at": sct.TIMESTAMP_NTZ,
"_sdc_batched_at": sct.TIMESTAMP_NTZ,
"_sdc_received_at": sct.TIMESTAMP_NTZ,
Expand All @@ -446,10 +447,6 @@ def validate(self) -> None:
for column in table_schema.columns:
assert column.name in expected_types
isinstance(column.type, expected_types[column.name])
if column.name == "col_multiple_of":
assert column.type.precision == 38
assert column.type.scale == 4


target_tests = TestSuite(
kind="target",
Expand Down
4 changes: 2 additions & 2 deletions tests/target_test_streams/type_edge_cases.singer
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }}}}
{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456}}
{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }, "col_multiple_of_int": {"multipleOf": 10, "type": [ "null", "number" ] }}}}
{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456, "col_multiple_of_int": 100}}

0 comments on commit ae8f1aa

Please sign in to comment.