Skip to content

Commit

Permalink
[native] Add TPC-DS connector
Browse files Browse the repository at this point in the history
Co-authored-by: Pramod Satya <pramod.satya@ibm.com>
  • Loading branch information
2 people authored and Pratik Joseph Dabre committed Sep 6, 2024
1 parent c49bf2e commit 47e2545
Show file tree
Hide file tree
Showing 34 changed files with 2,541 additions and 45 deletions.
4 changes: 3 additions & 1 deletion presto-docs/src/main/sphinx/presto-cpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,6 @@ Only specific connectors are supported in the Presto C++ evaluation engine.

* Iceberg connector supports both V1 and V2 tables, including tables with delete files.

* TPCH connector, with ``tpch.naming=standard`` catalog property.
* TPCH connector, with ``tpch.naming=standard`` catalog property.

* TPCDS connector.
3 changes: 2 additions & 1 deletion presto-native-execution/presto_cpp/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_subdirectory(types)
add_subdirectory(http)
add_subdirectory(common)
add_subdirectory(thrift)
add_subdirectory(connectors)

add_library(
presto_server_lib
Expand Down Expand Up @@ -93,7 +94,7 @@ add_executable(presto_server PrestoMain.cpp)
# "undefined reference to `vtable for velox::connector::tpch::TpchTableHandle`"
# TODO: Fix these errors.
target_link_libraries(presto_server presto_server_lib velox_hive_connector
velox_tpch_connector)
velox_tpch_connector presto_tpcds_connector)

if(PRESTO_ENABLE_REMOTE_FUNCTIONS)
add_library(presto_server_remote_function JsonSignatureParser.cpp
Expand Down
2 changes: 2 additions & 0 deletions presto-native-execution/presto_cpp/main/PrestoServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ void PrestoServer::run() {
std::make_unique<SystemPrestoToVeloxConnector>("system"));
registerPrestoToVeloxConnector(
std::make_unique<SystemPrestoToVeloxConnector>("$system@system"));
registerPrestoToVeloxConnector(
std::make_unique<TpcdsPrestoToVeloxConnector>("tpcds"));

initializeVeloxMemory();
initializeThreadPools();
Expand Down
15 changes: 15 additions & 0 deletions presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(tpcds)
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.14)
cmake_policy(SET CMP0079 NEW)

project(TPCDS)

add_library(presto_tpcds_connector OBJECT TpcdsConnector.cpp)
target_link_libraries(presto_tpcds_connector velox_connector tpcds_gen fmt::fmt)

# Without this hack, there are multiple link errors similar to the one below
# only on GCC. "undefined reference to `vtable for
# velox::connector::tpcds::TpcdsTableHandle`. TODO: Fix this hack.
target_link_libraries(velox_exec_test_lib presto_tpcds_connector)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wno-deprecated-declarations -Wno-writable-strings
-Wno-missing-field-initializers)
endif()

# This stringop-overflow warning seems to have lots of false positives and has
# been the source of a lot of compiler bug reports (e.g.
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578), which causes
# parquet-amalgamation.cpp to fail to compile. For now, we disable this warning
# on the affected compiler (GCC).
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
add_compile_options(-Wno-stringop-overflow -Wno-write-strings)
endif()

# Include directories
include_directories(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include)
include_directories(
${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include/dsdgen-c)
include_directories(${CMAKE_SOURCE_DIR}/presto_cpp/external/include)

# Add subdirectories
add_subdirectory(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/dsdgen-c build)

add_library(append_info OBJECT include/append_info-c.cpp)
target_link_libraries(append_info velox_vector_test_lib Folly::folly xsimd)
target_link_libraries(dsdgen_c append_info)

add_library(tpcds_gen TpcdsGen.cpp DSDGenIterator.cpp)
target_include_directories(tpcds_gen PUBLIC dsdgen/include)
target_link_libraries(tpcds_gen velox_memory velox_vector dsdgen_c append_info
fmt::fmt)
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "presto_cpp/main/connectors/tpcds/DSDGenIterator.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/genrand.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/parallel.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/params.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/scaling.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/tdefs.h"

using namespace facebook::velox;
namespace facebook::presto::connector::tpcds {

void InitializeDSDgen(
double scale,
vector_size_t parallel,
vector_size_t child,
DSDGenContext& dsdGenContext) {
dsdGenContext.Reset();
resetCountCount();

std::string scaleStr = std::to_string(scale);
set_str("SCALE", scaleStr.c_str(), dsdGenContext);
std::string parallelStr = std::to_string(parallel);
set_str("PARALLEL", parallelStr.c_str(), dsdGenContext);
std::string childStr = std::to_string(child);
set_str("CHILD", childStr.c_str(), dsdGenContext);

init_rand(dsdGenContext); // no random numbers without this
}

std::string getQuery(int query) {
if (query <= 0 || query > TPCDS_QUERIES_COUNT) {
throw std::exception();
}
return TPCDS_QUERIES[query - 1];
}

DSDGenIterator::DSDGenIterator(
double scaleFactor,
vector_size_t parallel,
vector_size_t child) {
table_defs.resize(DBGEN_VERSION); // there are 24 TPC-DS tables
VELOX_CHECK_GE(scaleFactor, 0.0, "Tpcds scale factor must be non-negative");
InitializeDSDgen(scaleFactor, parallel, child, dsdgenCtx_);
}

void DSDGenIterator::initializeTable(
const std::vector<VectorPtr>& children,
int table_id) {
auto tdef = getSimpleTdefsByNumber(table_id, dsdgenCtx_);
tpcds_table_def table_def;
table_def.name = tdef->name;
table_def.fl_child = tdef->flags & FL_CHILD ? 1 : 0;
table_def.fl_small = tdef->flags & FL_SMALL ? 1 : 0;
table_def.first_column = tdef->nFirstColumn;
table_def.children = children;
table_def.dsdGenContext = &dsdgenCtx_;
table_defs[table_id] = std::make_unique<tpcds_table_def>(table_def);
}

std::vector<std::unique_ptr<tpcds_table_def>>& DSDGenIterator::getTableDefs() {
return table_defs;
};

tpcds_builder_func DSDGenIterator::GetTDefFunctionByNumber(int table_id) {
auto table_funcs = getTdefFunctionsByNumber(table_id);
return table_funcs->builder;
}

void DSDGenIterator::initTableOffset(int32_t table_id, size_t offset) {
row_skip(table_id, offset, dsdgenCtx_);
}
void DSDGenIterator::genRow(int32_t table_id, size_t index) {
auto builder_func = GetTDefFunctionByNumber(table_id);
builder_func((void*)&table_defs, index, dsdgenCtx_);
row_stop(table_id, dsdgenCtx_);
}

int64_t DSDGenIterator::getRowCount(int32_t table) {
return get_rowcount(table, dsdgenCtx_);
}

} // namespace facebook::presto::connector::tpcds
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <memory>

#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h"
#include "presto_cpp/external/dsdgen/include/tpcds_constants.hpp"
#include "presto_cpp/main/connectors/tpcds/include/append_info-c.hpp"

using namespace facebook::velox;
namespace facebook::presto::connector::tpcds {

typedef int64_t ds_key_t;

typedef int (*tpcds_builder_func)(void*, ds_key_t, DSDGenContext& dsdgenCtx);

void InitializeDSDgen(
double scale,
vector_size_t parallel,
vector_size_t child,
DSDGenContext& dsdGenContext);

std::string getQuery(int query);

/// This class exposes a thread-safe and reproducible iterator over TPC-DS
/// synthetically generated data, backed by DSDGEN.
class DSDGenIterator {
public:
explicit DSDGenIterator(
double scaleFactor,
vector_size_t parallel,
vector_size_t child);

void initializeTable(const std::vector<VectorPtr>& children, int table);

std::vector<std::unique_ptr<tpcds_table_def>>& getTableDefs();

// Before generating records using the gen*() functions below, call the
// appropriate init*() function to correctly initialize the seed given the
// offset to be generated.
void initTableOffset(int32_t table_id, size_t offset);

// Generate different types of records.
void genRow(int32_t table_id, size_t index);

ds_key_t getRowCount(int32_t table_id);

tpcds_builder_func GetTDefFunctionByNumber(int table_id);

protected:
DSDGenContext dsdgenCtx_;
std::vector<std::unique_ptr<tpcds_table_def>> table_defs;
};

} // namespace facebook::presto::connector::tpcds
Loading

0 comments on commit 47e2545

Please sign in to comment.