Add support for all timezones for timestamp (#254)

`np.dtype('datetime64[ns, UTC]')` is not a numpy type. Therefore, we were getting the type error. I have also added a proper conversion of of timestamps from various Timezones to 'UTC'. Since that is a standard which we follow in core. Also added unit tests to validate and catch similar exception in future. With new changes: 1) TIMESTAMP type with different timezones would be accepted and converted to GMT/UTC time which is assumed/followed in omnisci core 2) Increase unit tests coverage Relates to #219 fixes #253
heavyai · Jul 25, 2019 · b820ef4 · b820ef4
1 parent 777a26f
commit b820ef4
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 16 deletions.
diff --git a/pymapd/_utils.py b/pymapd/_utils.py
@@ -1,5 +1,6 @@
 import datetime
 import numpy as np
+import pandas as pd
 
 
 def seconds_to_time(seconds):
@@ -23,14 +24,15 @@ def datetime_to_seconds(arr):
  if arr.dtype == 'int64':
  # The user has passed a unix timestamp already
  return arr
- elif arr.dtype == 'object' or arr.dtype == 'datetime64[ns, UTC]':
+ elif arr.dtype == 'object' or str(arr.dtype).startswith(
+ 'datetime64[ns,'):
  # Convert to datetime64[ns] from string
  # Or from datetime with timezone information
- arr = arr.astype('datetime64[ns]')
+ # Return timestamp in 'UTC'
+ arr = pd.to_datetime(arr, utc=True)
  else:
  raise TypeError(f"Invalid dtype '{arr.dtype}', expected one of: "
- "datetime64[ns], datetime64[ns, UTC], "
- "int64 (representing seconds since epoch), "
+ "datetime64[ns], int64 (UNIX epoch), "
  "or object (string)")
  return arr.view('i8') // 10**9 # ns -> s since epoch
 

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -594,17 +594,24 @@ def test_load_columnar_pandas_all(self, con):
  c.execute(create)
 
  data = pd.DataFrame({
- "boolean_": [True, False],
- "smallint_": np.array([0, 1], dtype=np.int16),
- "int_": np.array([0, 1], dtype=np.int32),
- "bigint_": np.array([0, 1], dtype=np.int64),
- "float_": np.array([0, 1], dtype=np.float32),
- "double_": np.array([0, 1], dtype=np.float64),
- "varchar_": ["a", "b"],
- "text_": ['a', 'b'],
- "time_": [datetime.time(0, 11, 59), datetime.time(13)],
- "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
- "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
+ "boolean_": [True, False, True, False],
+ "smallint_": np.array([0, 1, 0, 1], dtype=np.int16),
+ "int_": np.array([0, 1, 0, 1], dtype=np.int32),
+ "bigint_": np.array([0, 1, 0, 1], dtype=np.int64),
+ "float_": np.array([0, 1, 0, 1], dtype=np.float32),
+ "double_": np.array([0, 1, 0, 1], dtype=np.float64),
+ "varchar_": ["a", "b", "a", "b"],
+ "text_": ['a', 'b', 'a', 'b'],
+ "time_": [datetime.time(0, 11, 59), datetime.time(13),
+ datetime.time(22, 58, 59), datetime.time(7, 13, 43)],
+ "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"),
+ pd.Timestamp('2017-11-28 23:55:59.342380',
+ tz='US/Eastern'),
+ pd.Timestamp('2018-11-28 23:55:59.342380',
+ tz='Asia/Calcutta')],
+ "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1),
+ datetime.date(2017, 11, 28),
+ datetime.date(2018, 11, 28)],
  }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
  'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
  'date_'])
@@ -618,7 +625,15 @@ def test_load_columnar_pandas_all(self, con):
  (0, 1, 1, 1, 1.0, 1.0, 'b', 'b',
  datetime.time(13, 0),
  datetime.datetime(2017, 1, 1, 0, 0),
- datetime.date(2017, 1, 1))]
+ datetime.date(2017, 1, 1)),
+ (1, 0, 0, 0, 0.0, 0.0, 'a', 'a',
+ datetime.time(22, 58, 59),
+ datetime.datetime(2017, 11, 29, 4, 55, 59),
+ datetime.date(2017, 11, 28)),
+ (0, 1, 1, 1, 1.0, 1.0, 'b', 'b',
+ datetime.time(7, 13, 43),
+ datetime.datetime(2018, 11, 28, 18, 25, 59),
+ datetime.date(2018, 11, 28))]
 
  assert result == expected
  c.execute('drop table if exists all_types;')