fix: Numbers to floats (#106)

Closes #103 - Replaces #104 - This implementations is based on this comment #103 (comment). Wise treats all `number` json types as floats in snowflake. - As part of this I reverted the changes from #97 since they arent needed anymore. We'll let the json schema validation enforce this vs having strict snowflake schema enforcement. Warning: I think this is a breaking change because I believe the target will now try to update the type of existing columns from NUMBER to DOUBLE which throws a `cannot change column NUM_COL from type NUMBER(38,0) to FLOAT`. @edgarrmondragon how do you think we should handle this in terms of breaking changes? Should this be the 1.0 release?
MeltanoLabs · Aug 14, 2023 · ae8f1aa · ae8f1aa
1 parent 045b614
commit ae8f1aa
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 36 deletions.
diff --git a/target_snowflake/connector.py b/target_snowflake/connector.py
@@ -14,8 +14,6 @@
 from target_snowflake.snowflake_types import NUMBER, TIMESTAMP_NTZ, VARIANT
 
 SNOWFLAKE_MAX_STRING_LENGTH = 16777216
-SNOWFLAKE_MAX_NUMBER_PRECISION = 38
-SNOWFLAKE_MAX_NUMBER_SCALE = 0
 
 class TypeMap:
  def __init__(self, operator, map_value, match_value=None):
@@ -95,7 +93,7 @@ def _convert_type(self, sql_type):
  if isinstance(sql_type, sct.TIMESTAMP_NTZ):
  return TIMESTAMP_NTZ
  elif isinstance(sql_type, sct.NUMBER):
- return NUMBER(precision=sql_type.precision, scale=sql_type.scale)
+ return NUMBER
  elif isinstance(sql_type, sct.VARIANT):
  return VARIANT
  else:
@@ -220,28 +218,6 @@ def _conform_max_length(jsonschema_type):
  jsonschema_type["maxLength"] = SNOWFLAKE_MAX_STRING_LENGTH
  return jsonschema_type
 
- @staticmethod
- def _get_numeric_precision(jsonschema_type):
- return SNOWFLAKE_MAX_NUMBER_PRECISION
-
- @staticmethod
- def _get_numeric_scale(jsonschema_type):
- precision = SNOWFLAKE_MAX_NUMBER_SCALE
- if jsonschema_type.get("exclusiveMinimum"):
- if str(jsonschema_type[attrib])[-1] == 1:
- return len(str(jsonschema_type[attrib]).split(".")[1]) - 1
- else:
- return len(str(jsonschema_type[attrib]).split(".")[1])
- attribs_to_check = [
- "multipleOf",
- "min",
- "max",
- ]
- for attrib in attribs_to_check:
- if jsonschema_type.get(attrib):
- precision = max(precision, len(str(jsonschema_type[attrib]).split(".")[1]))
- return precision
-
  @staticmethod
  def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
  """Return a JSON Schema representation of the provided type.
@@ -260,8 +236,6 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
  # snowflake max and default varchar length
  # https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html
  maxlength = jsonschema_type.get("maxLength", SNOWFLAKE_MAX_STRING_LENGTH)
- num_precision = SnowflakeConnector._get_numeric_precision(jsonschema_type)
- num_scale = SnowflakeConnector._get_numeric_scale(jsonschema_type)
  # define type maps
  string_submaps = [
  TypeMap(eq, TIMESTAMP_NTZ(), "date-time"),
@@ -273,7 +247,7 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
  TypeMap(th._jsonschema_type_check, NUMBER(), ("integer",)),
  TypeMap(th._jsonschema_type_check, VARIANT(), ("object",)),
  TypeMap(th._jsonschema_type_check, VARIANT(), ("array",)),
- TypeMap(th._jsonschema_type_check, NUMBER(precision=num_precision, scale=num_scale), ("number",)),
+ TypeMap(th._jsonschema_type_check, sct.DOUBLE(), ("number",)),
  ]
  # apply type maps
  if th._jsonschema_type_check(jsonschema_type, ("string",)):

diff --git a/tests/core.py b/tests/core.py
@@ -255,7 +255,7 @@ def validate(self) -> None:
  table_schema = connector.get_table(table)
  expected_types = {
  "id": sct.NUMBER,
- "a1": sct.NUMBER,
+ "a1": sct.DOUBLE,
  "a2": sct.STRING,
  "a3": sqlalchemy.types.BOOLEAN,
  "a4": sct.VARIANT,
@@ -435,7 +435,8 @@ def validate(self) -> None:
  expected_types = {
  "id": sct.NUMBER,
  "col_max_length_str": sct.STRING,
- "col_multiple_of": sct.NUMBER,
+ "col_multiple_of": sct.DOUBLE,
+ "col_multiple_of_int": sct.DOUBLE,
  "_sdc_extracted_at": sct.TIMESTAMP_NTZ,
  "_sdc_batched_at": sct.TIMESTAMP_NTZ,
  "_sdc_received_at": sct.TIMESTAMP_NTZ,
@@ -446,10 +447,6 @@ def validate(self) -> None:
  for column in table_schema.columns:
  assert column.name in expected_types
  isinstance(column.type, expected_types[column.name])
- if column.name == "col_multiple_of":
- assert column.type.precision == 38
- assert column.type.scale == 4
-
 
 target_tests = TestSuite(
  kind="target",

diff --git a/tests/target_test_streams/type_edge_cases.singer b/tests/target_test_streams/type_edge_cases.singer
@@ -1,2 +1,2 @@
-{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }}}}
-{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456}}
+{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }, "col_multiple_of_int": {"multipleOf": 10, "type": [ "null", "number" ] }}}}
+{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456, "col_multiple_of_int": 100}}