From 8c6f2a3256a06e0d8be316b678fb66db8c6509ee Mon Sep 17 00:00:00 2001 From: Pramod Date: Mon, 28 Oct 2024 21:00:18 -0700 Subject: [PATCH] Add function serializeSingleColumn to PrestoVectorSerde --- velox/serializers/PrestoSerializer.cpp | 20 ++++++++++++++++++++ velox/serializers/PrestoSerializer.h | 14 ++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp index 26000c49e6118..ff9462aa00ac9 100644 --- a/velox/serializers/PrestoSerializer.cpp +++ b/velox/serializers/PrestoSerializer.cpp @@ -4265,6 +4265,26 @@ void PrestoVectorSerde::deserializeSingleColumn( *result = row->childAt(0); } +void PrestoVectorSerde::serializeSingleColumn( + const VectorPtr& vector, + const PrestoOptions& opts, + memory::MemoryPool* pool, + std::ostream* output) { + const auto numRows = vector->size(); + VELOX_USER_CHECK_EQ(numRows, 1, "Input vector should have a single element"); + const IndexRange allRows{0, numRows}; + const auto ranges = folly::Range(&allRows, 1); + const auto arena = std::make_unique(pool); + auto stream = std::make_unique( + vector->type(), std::nullopt, std::nullopt, arena.get(), numRows, opts); + Scratch scratch; + serializeColumn(vector, ranges, stream.get(), scratch); + + PrestoOutputStreamListener listener; + OStreamOutputStream outputStream(output, &listener); + stream->flush(&outputStream); +} + // static void PrestoVectorSerde::registerVectorSerde() { auto toByte = [](int32_t number, int32_t bit) { diff --git a/velox/serializers/PrestoSerializer.h b/velox/serializers/PrestoSerializer.h index 6bd323568d116..74c8a5f031d9b 100644 --- a/velox/serializers/PrestoSerializer.h +++ b/velox/serializers/PrestoSerializer.h @@ -41,6 +41,10 @@ namespace facebook::velox::serializer::presto { /// 2. To serialize a single RowVector, one can use the BatchVectorSerializer /// returned by createBatchSerializer(). Since it serializes a single RowVector, /// it tries to preserve the encodings of the input data. +/// +/// 3. To serialize data from a vector containing a single element into one +/// column, adhering to PrestoPage's column format and excluding the PrestoPage +/// header, one can use serializeSingleColumn() directly. class PrestoVectorSerde : public VectorSerde { public: // Input options that the serializer recognizes. @@ -134,6 +138,16 @@ class PrestoVectorSerde : public VectorSerde { VectorPtr* result, const Options* options); + /// This function is used to serialize data from a vector containing a single + /// element into one column that conforms to PrestoPage's column format. The + /// PrestoPage header is not included, so the serialized binary data starts + /// at the column header. + void serializeSingleColumn( + const VectorPtr& vector, + const PrestoOptions& opts, + memory::MemoryPool* pool, + std::ostream* output); + enum class TokenType { HEADER, NUM_COLUMNS,