Skip to content

Commit

Permalink
Add support for all timezones for timestamp (#254)
Browse files Browse the repository at this point in the history
`np.dtype('datetime64[ns, UTC]')` is not a numpy type. Therefore, we were getting the type error. I have also added a proper conversion of of timestamps from various Timezones to 'UTC'. Since that is a standard which we follow in core.
Also added unit tests to validate and catch similar exception in future.

With new changes:
1) TIMESTAMP type with different timezones would be accepted and converted to GMT/UTC time which is assumed/followed in omnisci core
2) Increase unit tests coverage

Relates to #219
fixes #253
  • Loading branch information
wamsiv committed Jul 25, 2019
1 parent 777a26f commit b820ef4
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 16 deletions.
10 changes: 6 additions & 4 deletions pymapd/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import numpy as np
import pandas as pd


def seconds_to_time(seconds):
Expand All @@ -23,14 +24,15 @@ def datetime_to_seconds(arr):
if arr.dtype == 'int64':
# The user has passed a unix timestamp already
return arr
elif arr.dtype == 'object' or arr.dtype == 'datetime64[ns, UTC]':
elif arr.dtype == 'object' or str(arr.dtype).startswith(
'datetime64[ns,'):
# Convert to datetime64[ns] from string
# Or from datetime with timezone information
arr = arr.astype('datetime64[ns]')
# Return timestamp in 'UTC'
arr = pd.to_datetime(arr, utc=True)
else:
raise TypeError(f"Invalid dtype '{arr.dtype}', expected one of: "
"datetime64[ns], datetime64[ns, UTC], "
"int64 (representing seconds since epoch), "
"datetime64[ns], int64 (UNIX epoch), "
"or object (string)")
return arr.view('i8') // 10**9 # ns -> s since epoch

Expand Down
39 changes: 27 additions & 12 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,17 +594,24 @@ def test_load_columnar_pandas_all(self, con):
c.execute(create)

data = pd.DataFrame({
"boolean_": [True, False],
"smallint_": np.array([0, 1], dtype=np.int16),
"int_": np.array([0, 1], dtype=np.int32),
"bigint_": np.array([0, 1], dtype=np.int64),
"float_": np.array([0, 1], dtype=np.float32),
"double_": np.array([0, 1], dtype=np.float64),
"varchar_": ["a", "b"],
"text_": ['a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")],
"date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)],
"boolean_": [True, False, True, False],
"smallint_": np.array([0, 1, 0, 1], dtype=np.int16),
"int_": np.array([0, 1, 0, 1], dtype=np.int32),
"bigint_": np.array([0, 1, 0, 1], dtype=np.int64),
"float_": np.array([0, 1, 0, 1], dtype=np.float32),
"double_": np.array([0, 1, 0, 1], dtype=np.float64),
"varchar_": ["a", "b", "a", "b"],
"text_": ['a', 'b', 'a', 'b'],
"time_": [datetime.time(0, 11, 59), datetime.time(13),
datetime.time(22, 58, 59), datetime.time(7, 13, 43)],
"timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017"),
pd.Timestamp('2017-11-28 23:55:59.342380',
tz='US/Eastern'),
pd.Timestamp('2018-11-28 23:55:59.342380',
tz='Asia/Calcutta')],
"date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1),
datetime.date(2017, 11, 28),
datetime.date(2018, 11, 28)],
}, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
'double_', 'varchar_', 'text_', 'time_', 'timestamp_',
'date_'])
Expand All @@ -618,7 +625,15 @@ def test_load_columnar_pandas_all(self, con):
(0, 1, 1, 1, 1.0, 1.0, 'b', 'b',
datetime.time(13, 0),
datetime.datetime(2017, 1, 1, 0, 0),
datetime.date(2017, 1, 1))]
datetime.date(2017, 1, 1)),
(1, 0, 0, 0, 0.0, 0.0, 'a', 'a',
datetime.time(22, 58, 59),
datetime.datetime(2017, 11, 29, 4, 55, 59),
datetime.date(2017, 11, 28)),
(0, 1, 1, 1, 1.0, 1.0, 'b', 'b',
datetime.time(7, 13, 43),
datetime.datetime(2018, 11, 28, 18, 25, 59),
datetime.date(2018, 11, 28))]

assert result == expected
c.execute('drop table if exists all_types;')
Expand Down

0 comments on commit b820ef4

Please sign in to comment.