From bcb33b9e9f4450de2db338f82c4ef3b1b9468522 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Sun, 22 Sep 2024 17:43:46 -0400 Subject: [PATCH] feat(py/client-ticking): Add support for LocalDate and LocalTime --- .../src/pydeephaven_ticking/_core.pxd | 16 ++++ .../src/pydeephaven_ticking/_core.pyx | 77 ++++++++++++++++--- py/client/README.md | 9 +++ 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/py/client-ticking/src/pydeephaven_ticking/_core.pxd b/py/client-ticking/src/pydeephaven_ticking/_core.pxd index 199bad6a2c8..d9230686533 100644 --- a/py/client-ticking/src/pydeephaven_ticking/_core.pxd +++ b/py/client-ticking/src/pydeephaven_ticking/_core.pxd @@ -138,10 +138,18 @@ cdef extern from "deephaven/dhcore/types.h" namespace "deephaven::dhcore": kBool "deephaven::dhcore::ElementTypeId::kBool" kString "deephaven::dhcore::ElementTypeId::kString" kTimestamp "deephaven::dhcore::ElementTypeId::kTimestamp" + kLocalDate "deephaven::dhcore::ElementTypeId::kLocalDate" + kLocalTime "deephaven::dhcore::ElementTypeId::kLocalTime" cdef cppclass CDateTime "deephaven::dhcore::DateTime": pass + cdef cppclass CLocalDate "deephaven::dhcore::LocalDate": + pass + + cdef cppclass CLocalTime "deephaven::dhcore::LocalTime": + pass + cdef extern from "deephaven/dhcore/utility/cython_support.h" namespace "deephaven::dhcore::utility": cdef cppclass CCythonSupport "deephaven::dhcore::utility::CythonSupport": @staticmethod @@ -157,6 +165,14 @@ cdef extern from "deephaven/dhcore/utility/cython_support.h" namespace "deephave shared_ptr[CColumnSource] CreateDateTimeColumnSource(const int64_t *dataBegin, const int64_t *dataEnd, const uint8_t *validityBegin, const uint8_t *validityEnd, size_t numElements) + @staticmethod + shared_ptr[CColumnSource] CreateLocalDateColumnSource(const int64_t *dataBegin, const int64_t *dataEnd, + const uint8_t *validityBegin, const uint8_t *validityEnd, size_t numElements) + + @staticmethod + shared_ptr[CColumnSource] CreateLocalTimeColumnSource(const int64_t *dataBegin, const int64_t *dataEnd, + const uint8_t *validityBegin, const uint8_t *validityEnd, size_t numElements) + @staticmethod ElementTypeId GetElementTypeId(const CColumnSource &columnSource) diff --git a/py/client-ticking/src/pydeephaven_ticking/_core.pyx b/py/client-ticking/src/pydeephaven_ticking/_core.pyx index 681db2f7aa5..64b8ccfe7ba 100644 --- a/py/client-ticking/src/pydeephaven_ticking/_core.pyx +++ b/py/client-ticking/src/pydeephaven_ticking/_core.pyx @@ -20,7 +20,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr from libcpp.string cimport string from libcpp.utility cimport move, pair from libcpp.vector cimport vector -from typing import Dict, List, Sequence, Union, cast +from typing import Sequence, cast # Simple wrapper of the corresponding C++ TickingUpdate class. cdef class TickingUpdate: @@ -291,6 +291,16 @@ cdef class ColumnSource: dest_data_as_int64 = dest_data.view(dtype=np.int64) self._fill_timestamp_chunk(rows, dest_data_as_int64, null_flags_ptr) arrow_type = pa.timestamp("ns", tz="UTC") + elif element_type_id == ElementTypeId.kLocalDate: + dest_data = np.zeros(size, dtype=np.int64) + dest_data_as_int64 = dest_data.view(dtype=np.int64) + self._fill_localdate_chunk(rows, dest_data_as_int64, null_flags_ptr) + arrow_type = pa.date64() + elif element_type_id == ElementTypeId.kLocalTime: + dest_data = np.zeros(size, dtype=np.int64) + dest_data_as_int64 = dest_data.view(dtype=np.int64) + self._fill_localtime_chunk(rows, dest_data_as_int64, null_flags_ptr) + arrow_type = pa.time64("ns") else: raise RuntimeError(f"Unexpected ElementTypeId {element_type_id}") @@ -329,13 +339,36 @@ cdef class ColumnSource: # fill_chunk helper method for timestamp. In this case we shamelessly treat the Python timestamp # type as an int64, and then further shamelessly pretend that it's a Deephaven DateTime type. cdef _fill_timestamp_chunk(self, rows: RowSequence, int64_t[::1] dest_data, CGenericChunk[bool] *null_flags_ptr): - """ - static_assert(sizeof(int64_t) == sizeof(CDateTime)); - """ + cdef extern from "": + """ + static_assert(deephaven::dhcore::DateTime::IsBlittableToInt64()); + """ rsSize = rows.size dest_chunk = CGenericChunk[CDateTime].CreateView(&dest_data[0], rsSize) deref(self.column_source).FillChunk(deref(rows.row_sequence), &dest_chunk, null_flags_ptr) + # fill_chunk helper method for LocalDate. In this case we shamelessly treat the Python timestamp + # type as an int64, and then further shamelessly pretend that it's a Deephaven LocalDate type. + cdef _fill_localdate_chunk(self, rows: RowSequence, int64_t[::1] dest_data, CGenericChunk[bool] *null_flags_ptr): + cdef extern from *: + """ + static_assert(deephaven::dhcore::LocalDate::IsBlittableToInt64()); + """ + rsSize = rows.size + dest_chunk = CGenericChunk[CLocalDate].CreateView(&dest_data[0], rsSize) + deref(self.column_source).FillChunk(deref(rows.row_sequence), &dest_chunk, null_flags_ptr) + + # fill_chunk helper method for LocalTime. In this case we shamelessly treat the Python timestamp + # type as an int64, and then further shamelessly pretend that it's a Deephaven LocalTime type. + cdef _fill_localtime_chunk(self, rows: RowSequence, int64_t[::1] dest_data, CGenericChunk[bool] *null_flags_ptr): + cdef extern from *: + """ + static_assert(deephaven::dhcore::LocalTime::IsBlittableToInt64()); + """ + rsSize = rows.size + dest_chunk = CGenericChunk[CLocalTime].CreateView(&dest_data[0], rsSize) + deref(self.column_source).FillChunk(deref(rows.row_sequence), &dest_chunk, null_flags_ptr) + # Converts an Arrow array to a C++ ColumnSource of the right type. The created column source does not own the # memory used, so it is only valid as long as the original Arrow array is valid. cdef shared_ptr[CColumnSource] _convert_arrow_array_to_column_source(array: pa.Array) except *: @@ -345,6 +378,10 @@ cdef shared_ptr[CColumnSource] _convert_arrow_array_to_column_source(array: pa.A return _convert_arrow_boolean_array_to_column_source(cast(pa.lib.BooleanArray, array)) if isinstance(array, pa.lib.TimestampArray): return _convert_arrow_timestamp_array_to_column_source(cast(pa.lib.TimestampArray, array)) + if isinstance(array, pa.lib.Date64Array): + return _convert_arrow_date64_array_to_column_source(cast(pa.lib.Date64Array, array)) + if isinstance(array, pa.lib.Time64Array): + return _convert_arrow_time64_array_to_column_source(cast(pa.lib.Time64Array, array)) buffers = array.buffers() if len(buffers) != 2: raise RuntimeError(f"Expected 2 simple type buffers, got {len(buffers)}") @@ -427,10 +464,32 @@ cdef shared_ptr[CColumnSource] _convert_arrow_string_array_to_column_source(arra # Converts an Arrow TimestampArray to a C++ DateTimeColumnSource. The created column source does not own the # memory used, so it is only valid as long as the original Arrow array is valid. cdef shared_ptr[CColumnSource] _convert_arrow_timestamp_array_to_column_source(array: pa.TimestampArray) except *: + return _convert_underlying_int64_to_column_source(array, CCythonSupport.CreateDateTimeColumnSource) + +# Converts an Arrow Date64Array to a C++ LocalDateColumnSource. The created column source does not own the +# memory used, so it is only valid as long as the original Arrow array is valid. +cdef shared_ptr[CColumnSource] _convert_arrow_date64_array_to_column_source(array: pa.Date64Array) except *: + return _convert_underlying_int64_to_column_source(array, CCythonSupport.CreateLocalDateColumnSource) + +# Converts an Arrow Time64Array to a C++ LocalTimeColumnSource. The created column source does not own the +# memory used, so it is only valid as long as the original Arrow array is valid. +cdef shared_ptr[CColumnSource] _convert_arrow_time64_array_to_column_source(array: pa.Time64Array) except *: + return _convert_underlying_int64_to_column_source(array, CCythonSupport.CreateLocalTimeColumnSource) + +# Signature of one of the factory functions in CCythonSupport: CreateDateTimeColumnSource, CreateLocalDateColumnSource +# or CreateLocalTimeColumnSource. +ctypedef shared_ptr[CColumnSource](*factory_t)(const int64_t *, const int64_t *, const uint8_t *, const uint8_t *, size_t) + +# Converts one of the numeric Arrow types with an underlying int64 representation to the +# corresponding ColumnSource type. The created column source does not own the +# memory used, so it is only valid as long as the original Arrow array is valid. +cdef shared_ptr[CColumnSource] _convert_underlying_int64_to_column_source( + array: pa.NumericArray, + factory: factory_t) except *: num_elements = len(array) buffers = array.buffers() if len(buffers) != 2: - raise RuntimeError(f"Expected 2 timestamp buffers, got {len(buffers)}") + raise RuntimeError(f"Expected 2 buffers, got {len(buffers)}") validity = buffers[0] data = buffers[1] @@ -442,9 +501,7 @@ cdef shared_ptr[CColumnSource] _convert_arrow_timestamp_array_to_column_source(a cdef const int64_t *data_begin = data.address cdef const int64_t *data_end = (data.address + data.size) - - return CCythonSupport.CreateDateTimeColumnSource(data_begin, data_end, validity_begin, validity_end, - num_elements) + return factory(data_begin, data_end, validity_begin, validity_end, num_elements) # This method converts a PyArrow Schema object to a C++ Schema object. cdef shared_ptr[CSchema] _pyarrow_schema_to_deephaven_schema(src: pa.Schema) except *: @@ -547,7 +604,9 @@ cdef _equivalentTypes = [ _EquivalentTypes.create(ElementTypeId.kDouble, pa.float64()), _EquivalentTypes.create(ElementTypeId.kBool, pa.bool_()), _EquivalentTypes.create(ElementTypeId.kString, pa.string()), - _EquivalentTypes.create(ElementTypeId.kTimestamp, pa.timestamp("ns", "UTC")) + _EquivalentTypes.create(ElementTypeId.kTimestamp, pa.timestamp("ns", "UTC")), + _EquivalentTypes.create(ElementTypeId.kLocalDate, pa.date64()), + _EquivalentTypes.create(ElementTypeId.kLocalTime, pa.time64("ns")) ] # Converts a Deephaven type (an enum) into the corresponding PyArrow type. diff --git a/py/client/README.md b/py/client/README.md index 31d774ccb24..6f9843fcf49 100644 --- a/py/client/README.md +++ b/py/client/README.md @@ -3,6 +3,15 @@ Deephaven Python Client is a Python package created by Deephaven Data Labs. It is a client API that allows Python applications to remotely access Deephaven data servers. +## `venv` + +It's recommended to install in a Python virtual environment (venv). Use a command like the +below to create a venv. + +``` shell +python3 -m venv ~/py/dhenv +``` + ## Source Directory ### From the deephaven-core repository root