Skip to content

Commit

Permalink
[native] Add TPC-DS connector
Browse files Browse the repository at this point in the history
Co-authored-by: Pramod Satya <pramod.satya@ibm.com>
  • Loading branch information
2 people authored and Pratik Joseph Dabre committed Sep 23, 2024
1 parent b895ea7 commit ce9914b
Show file tree
Hide file tree
Showing 32 changed files with 2,492 additions and 33 deletions.
4 changes: 3 additions & 1 deletion presto-docs/src/main/sphinx/presto-cpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,6 @@ Only specific connectors are supported in the Presto C++ evaluation engine.

* Iceberg connector supports both V1 and V2 tables, including tables with delete files.

* TPCH connector, with ``tpch.naming=standard`` catalog property.
* TPCH connector, with ``tpch.naming=standard`` catalog property.

* TPCDS connector.
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ public static DistributedQueryRunner createQueryRunner(
queryRunner.installPlugin(new TpcdsPlugin());
queryRunner.installPlugin(new TestingHiveEventListenerPlugin());
queryRunner.createCatalog("tpch", "tpch");
queryRunner.createCatalog("tpcds", "tpcds");
Map<String, String> tpcdsProperties = ImmutableMap.<String, String>builder()
.put("tpcds.toggle-char-to-varchar", "true")
.build();
queryRunner.createCatalog("tpcds", "tpcds", tpcdsProperties);
Map<String, String> tpchProperties = ImmutableMap.<String, String>builder()
.put("tpch.column-naming", "standard")
.build();
Expand Down
1 change: 1 addition & 0 deletions presto-native-execution/etc/catalog/tpcds.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
connector.name=tpcds
3 changes: 2 additions & 1 deletion presto-native-execution/presto_cpp/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_subdirectory(types)
add_subdirectory(http)
add_subdirectory(common)
add_subdirectory(thrift)
add_subdirectory(connectors)

add_library(
presto_server_lib
Expand Down Expand Up @@ -93,7 +94,7 @@ add_executable(presto_server PrestoMain.cpp)
# "undefined reference to `vtable for velox::connector::tpch::TpchTableHandle`"
# TODO: Fix these errors.
target_link_libraries(presto_server presto_server_lib velox_hive_connector
velox_tpch_connector)
velox_tpch_connector presto_tpcds_connector)

if(PRESTO_ENABLE_REMOTE_FUNCTIONS)
add_library(presto_server_remote_function JsonSignatureParser.cpp
Expand Down
2 changes: 2 additions & 0 deletions presto-native-execution/presto_cpp/main/PrestoServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ void PrestoServer::run() {
std::make_unique<SystemPrestoToVeloxConnector>("system"));
registerPrestoToVeloxConnector(
std::make_unique<SystemPrestoToVeloxConnector>("$system@system"));
registerPrestoToVeloxConnector(
std::make_unique<TpcdsPrestoToVeloxConnector>("tpcds"));

initializeVeloxMemory();
initializeThreadPools();
Expand Down
15 changes: 15 additions & 0 deletions presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(tpcds)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_policy(SET CMP0079 NEW)

project(TPCDS)

add_library(presto_tpcds_connector OBJECT TpcdsConnector.cpp)
target_link_libraries(presto_tpcds_connector velox_connector tpcds_gen fmt::fmt)

# Without this hack, there are multiple link errors similar to the one below
# only on GCC. "undefined reference to `vtable for
# velox::connector::tpcds::TpcdsTableHandle`. TODO: Fix this hack.
target_link_libraries(velox_exec_test_lib presto_tpcds_connector)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wno-deprecated-declarations -Wno-writable-strings
-Wno-missing-field-initializers)
endif()

# This stringop-overflow warning seems to have lots of false positives and has
# been the source of a lot of compiler bug reports (e.g.
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578), which causes
# parquet-amalgamation.cpp to fail to compile. For now, we disable this warning
# on the affected compiler (GCC).
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
add_compile_options(-Wno-stringop-overflow -Wno-write-strings)
endif()

# Add subdirectories
add_subdirectory(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/dsdgen-c build)

add_library(append_info OBJECT utils/append_info-c.cpp)
target_link_libraries(append_info velox_vector_test_lib Folly::folly xsimd)
target_link_libraries(dsdgen_c append_info)

add_library(tpcds_gen TpcdsGen.cpp DSDGenIterator.cpp)
target_include_directories(tpcds_gen PUBLIC dsdgen/include)
target_link_libraries(tpcds_gen velox_memory velox_vector dsdgen_c append_info
fmt::fmt)
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "presto_cpp/main/connectors/tpcds/DSDGenIterator.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/genrand.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/parallel.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/params.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/scaling.h"
#include "presto_cpp/external/dsdgen/include/dsdgen-c/tdefs.h"
#include "velox/common/base/Exceptions.h"

using namespace facebook::velox;

namespace facebook::presto::connector::tpcds {

void initializeDSDgen(
double scale,
int32_t parallel,
int32_t child,
DSDGenContext& dsdGenContext) {
dsdGenContext.Reset();
resetCountCount();

std::string scaleStr = std::to_string(scale);
set_str("SCALE", scaleStr.c_str(), dsdGenContext);
std::string parallelStr = std::to_string(parallel);
set_str("PARALLEL", parallelStr.c_str(), dsdGenContext);
std::string childStr = std::to_string(child);
set_str("CHILD", childStr.c_str(), dsdGenContext);

init_rand(dsdGenContext); // no random numbers without this
}

std::string getQuery(int query) {
if (query <= 0 || query > TPCDS_QUERIES_COUNT) {
throw std::exception();
}
return TPCDS_QUERIES[query - 1];
}

DSDGenIterator::DSDGenIterator(
double scaleFactor,
int32_t parallel,
int32_t child) {
tableDefs_.resize(DBGEN_VERSION); // there are 24 TPC-DS tables
VELOX_CHECK_GE(scaleFactor, 0.0, "Tpcds scale factor must be non-negative");
initializeDSDgen(scaleFactor, parallel, child, dsdgenCtx_);
}

void DSDGenIterator::initializeTable(
const std::vector<VectorPtr>& children,
int table_id) {
auto tdef = getSimpleTdefsByNumber(table_id, dsdgenCtx_);
TpcdsTableDef table_def;
table_def.name = tdef->name;
table_def.fl_child = tdef->flags & FL_CHILD ? 1 : 0;
table_def.fl_small = tdef->flags & FL_SMALL ? 1 : 0;
table_def.first_column = tdef->nFirstColumn;
table_def.children = children;
table_def.dsdGenContext = &dsdgenCtx_;
tableDefs_[table_id] = std::make_unique<TpcdsTableDef>(table_def);
}

std::vector<std::unique_ptr<TpcdsTableDef>>& DSDGenIterator::getTableDefs() {
return tableDefs_;
};

tpcds_builder_func DSDGenIterator::getTDefFunctionByNumber(int table_id) {
auto table_funcs = getTdefFunctionsByNumber(table_id);
return table_funcs->builder;
}

void DSDGenIterator::initTableOffset(int32_t table_id, size_t offset) {
row_skip(table_id, offset, dsdgenCtx_);
}
void DSDGenIterator::genRow(int32_t table_id, size_t index) {
auto builder_func = getTDefFunctionByNumber(table_id);
builder_func((void*)&tableDefs_, index, dsdgenCtx_);
row_stop(table_id, dsdgenCtx_);
}

int64_t DSDGenIterator::getRowCount(int32_t table) {
return get_rowcount(table, dsdgenCtx_);
}

} // namespace facebook::presto::connector::tpcds
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <memory>

#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h"
#include "presto_cpp/external/dsdgen/include/tpcds_constants.hpp"
#include "presto_cpp/main/connectors/tpcds/utils/append_info-c.h"

using namespace facebook::velox;
namespace facebook::presto::connector::tpcds {

typedef int64_t ds_key_t;

typedef int (*tpcds_builder_func)(void*, ds_key_t, DSDGenContext& dsdgenCtx);

void initializeDSDgen(
double scale,
int32_t parallel,
int32_t child,
DSDGenContext& dsdGenContext);

std::string getQuery(int query);

/// This class exposes a thread-safe and reproducible iterator over TPC-DS
/// synthetically generated data, backed by DSDGEN.
class DSDGenIterator {
public:
explicit DSDGenIterator(double scaleFactor, int32_t parallel, int32_t child);

/// Initializes the table definition and the table schema.
void initializeTable(const std::vector<VectorPtr>& children, int table);

/// Returns a vector of all the table definitions.
std::vector<std::unique_ptr<TpcdsTableDef>>& getTableDefs();

// Before generating records using the gen*() functions below, call the
// initTableOffset(int32_t table_id, size_t offset) function to correctly
// initialize the seed given the offset to be generated.
// table_id corresponds to the table that needs to be generated and offset
// specifies the number of rows to skip before using the gen*() functions.
void initTableOffset(int32_t table_id, size_t offset);

/// Generate different types of records.
// table_id corresponds to the table that is to be generated and row is the
// row to be generated.
void genRow(int32_t table_id, size_t row);

/// Gets the row count for a table.
ds_key_t getRowCount(int32_t table_id);

// Gets the metadata for a table, which hold information about the mk_*()
// functions responsible for generating the data.
tpcds_builder_func getTDefFunctionByNumber(int table_id);

protected:
DSDGenContext dsdgenCtx_;
std::vector<std::unique_ptr<TpcdsTableDef>> tableDefs_;
};

} // namespace facebook::presto::connector::tpcds
Loading

0 comments on commit ce9914b

Please sign in to comment.