From e0c19567186cd0008f4b201c7636dc2d65f98b3c Mon Sep 17 00:00:00 2001 From: Wamsi Viswanath Date: Fri, 15 Mar 2019 23:38:32 -0400 Subject: [PATCH] Add timestamp precision support in Parser (#171) * Add timestamp precision support in parsers * Cast nanosecs to microsecs till support is added * Use python STL divmod and update docs --- docs/source/usage.rst | 6 ++++++ pymapd/_parsers.py | 11 ++++++----- pymapd/_utils.py | 22 ++++++++++++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index d40bfe9..b54b3c0 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -60,6 +60,12 @@ third method, using Thrift to serialize and deserialize the data, will slower than the GPU or CPU shared memory methods. The shared memory methods require that your OmniSci database is running on the same machine. +.. note:: + + We currently support ``Timestamp(0|3|6)`` data types i.e. seconds, milliseconds, + and microseconds granularity. Support for nanoseconds, ``Timestamp(9)`` is in + progress. + GPU Select ^^^^^^^^^^ diff --git a/pymapd/_parsers.py b/pymapd/_parsers.py index 1cc1626..be8e0f9 100644 --- a/pymapd/_parsers.py +++ b/pymapd/_parsers.py @@ -9,7 +9,7 @@ import ctypes from types import MethodType from ._mutators import set_tdf, get_tdf -from ._utils import seconds_to_time +from ._utils import seconds_to_time, datetime_in_precisions import numpy as np from .ipc import load_buffer, shmdt from typing import Any, List @@ -54,10 +54,10 @@ def _extract_row_val(desc, val): if val.is_null: return None val = getattr(val.val, _typeattr[typename] + '_val') - base = datetime.datetime(1970, 1, 1) if typename == 'TIMESTAMP': - val = (base + datetime.timedelta(seconds=val)) + val = datetime_in_precisions(val, desc.col_type.precision) elif typename == 'DATE': + base = datetime.datetime(1970, 1, 1) val = (base + datetime.timedelta(seconds=val)).date() elif typename == 'TIME': val = seconds_to_time(val) @@ -73,11 +73,12 @@ def _extract_col_vals(desc, val): vals = [None if null else v for null, v in zip(nulls, vals)] - base = datetime.datetime(1970, 1, 1) if typename == 'TIMESTAMP': - vals = [None if v is None else base + datetime.timedelta(seconds=v) + vals = [None if v is None else + datetime_in_precisions(v, desc.col_type.precision) for v in vals] elif typename == 'DATE': + base = datetime.datetime(1970, 1, 1) vals = [None if v is None else (base + datetime.timedelta(seconds=v)).date() for v in vals] elif typename == 'TIME': diff --git a/pymapd/_utils.py b/pymapd/_utils.py index 0c985ec..7b6faf4 100644 --- a/pymapd/_utils.py +++ b/pymapd/_utils.py @@ -34,6 +34,28 @@ def datetime_to_seconds(arr): return arr.view('i8') // 10**9 # ns -> s since epoch +def datetime_in_precisions(epoch, precision): + """Convert epoch time value into s, ms, us, ns""" + base = datetime.datetime(1970, 1, 1) + if precision == 0: + return base + datetime.timedelta(seconds=epoch) + elif precision == 3: + seconds, modulus = divmod(epoch, 1000) + return base + datetime.timedelta(seconds=seconds, milliseconds=modulus) + elif precision == 6: + seconds, modulus = divmod(epoch, 1000000) + return base + datetime.timedelta(seconds=seconds, microseconds=modulus) + elif precision == 9: + """ TODO(Wamsi): datetime.timedelta has support only till microseconds. + Need to find an alternative and fix nanoseconds + granularity""" + epoch /= 1000 + seconds, modulus = divmod(epoch, 1000000) + return base + datetime.timedelta(seconds=seconds, microseconds=modulus) + else: + raise TypeError("Invalid timestamp precision: {}".format(precision)) + + def date_to_seconds(arr): """Converts date into seconds"""