-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Pramod Satya <pramod.satya@ibm.com>
- Loading branch information
1 parent
c49bf2e
commit cc187f0
Showing
35 changed files
with
2,542 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
connector.name=tpcds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
add_subdirectory(tpcds) |
58 changes: 58 additions & 0 deletions
58
presto-native-execution/presto_cpp/main/connectors/tpcds/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
cmake_minimum_required(VERSION 3.14) | ||
cmake_policy(SET CMP0079 NEW) | ||
|
||
project(TPCDS) | ||
|
||
add_library(presto_tpcds_connector OBJECT TpcdsConnector.cpp) | ||
target_link_libraries(presto_tpcds_connector velox_connector tpcds_gen fmt::fmt) | ||
|
||
# Without this hack, there are multiple link errors similar to the one below | ||
# only on GCC. "undefined reference to `vtable for | ||
# velox::connector::tpcds::TpcdsTableHandle`. TODO: Fix this hack. | ||
target_link_libraries(velox_exec_test_lib presto_tpcds_connector) | ||
|
||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") | ||
add_compile_options(-Wno-deprecated-declarations -Wno-writable-strings | ||
-Wno-missing-field-initializers) | ||
endif() | ||
|
||
# This stringop-overflow warning seems to have lots of false positives and has | ||
# been the source of a lot of compiler bug reports (e.g. | ||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578), which causes | ||
# parquet-amalgamation.cpp to fail to compile. For now, we disable this warning | ||
# on the affected compiler (GCC). | ||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") | ||
add_compile_options(-Wno-stringop-overflow -Wno-write-strings) | ||
endif() | ||
|
||
# Include directories | ||
include_directories(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include) | ||
include_directories( | ||
${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include/dsdgen-c) | ||
include_directories(${CMAKE_SOURCE_DIR}/presto_cpp/external/include) | ||
|
||
# Add subdirectories | ||
add_subdirectory(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/dsdgen-c build) | ||
|
||
add_library(append_info OBJECT include/append_info-c.cpp) | ||
target_link_libraries(append_info velox_vector_test_lib Folly::folly xsimd) | ||
target_link_libraries(dsdgen_c append_info) | ||
|
||
add_library(tpcds_gen TpcdsGen.cpp DSDGenIterator.cpp) | ||
target_include_directories(tpcds_gen PUBLIC dsdgen/include) | ||
target_link_libraries(tpcds_gen velox_memory velox_vector dsdgen_c append_info | ||
fmt::fmt) |
96 changes: 96 additions & 0 deletions
96
presto-native-execution/presto_cpp/main/connectors/tpcds/DSDGenIterator.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "presto_cpp/main/connectors/tpcds/DSDGenIterator.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/genrand.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/parallel.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/params.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/scaling.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/tdefs.h" | ||
|
||
using namespace facebook::velox; | ||
namespace facebook::presto::connector::tpcds { | ||
|
||
void InitializeDSDgen( | ||
double scale, | ||
vector_size_t parallel, | ||
vector_size_t child, | ||
DSDGenContext& dsdGenContext) { | ||
dsdGenContext.Reset(); | ||
resetCountCount(); | ||
|
||
std::string scaleStr = std::to_string(scale); | ||
set_str("SCALE", scaleStr.c_str(), dsdGenContext); | ||
std::string parallelStr = std::to_string(parallel); | ||
set_str("PARALLEL", parallelStr.c_str(), dsdGenContext); | ||
std::string childStr = std::to_string(child); | ||
set_str("CHILD", childStr.c_str(), dsdGenContext); | ||
|
||
init_rand(dsdGenContext); // no random numbers without this | ||
} | ||
|
||
std::string getQuery(int query) { | ||
if (query <= 0 || query > TPCDS_QUERIES_COUNT) { | ||
throw std::exception(); | ||
} | ||
return TPCDS_QUERIES[query - 1]; | ||
} | ||
|
||
DSDGenIterator::DSDGenIterator( | ||
double scaleFactor, | ||
vector_size_t parallel, | ||
vector_size_t child) { | ||
table_defs.resize(DBGEN_VERSION); // there are 24 TPC-DS tables | ||
VELOX_CHECK_GE(scaleFactor, 0.0, "Tpcds scale factor must be non-negative"); | ||
InitializeDSDgen(scaleFactor, parallel, child, dsdgenCtx_); | ||
} | ||
|
||
void DSDGenIterator::initializeTable( | ||
const std::vector<VectorPtr>& children, | ||
int table_id) { | ||
auto tdef = getSimpleTdefsByNumber(table_id, dsdgenCtx_); | ||
tpcds_table_def table_def; | ||
table_def.name = tdef->name; | ||
table_def.fl_child = tdef->flags & FL_CHILD ? 1 : 0; | ||
table_def.fl_small = tdef->flags & FL_SMALL ? 1 : 0; | ||
table_def.first_column = tdef->nFirstColumn; | ||
table_def.children = children; | ||
table_def.dsdGenContext = &dsdgenCtx_; | ||
table_defs[table_id] = std::make_unique<tpcds_table_def>(table_def); | ||
} | ||
|
||
std::vector<std::unique_ptr<tpcds_table_def>>& DSDGenIterator::getTableDefs() { | ||
return table_defs; | ||
}; | ||
|
||
tpcds_builder_func DSDGenIterator::GetTDefFunctionByNumber(int table_id) { | ||
auto table_funcs = getTdefFunctionsByNumber(table_id); | ||
return table_funcs->builder; | ||
} | ||
|
||
void DSDGenIterator::initTableOffset(int32_t table_id, size_t offset) { | ||
row_skip(table_id, offset, dsdgenCtx_); | ||
} | ||
void DSDGenIterator::genRow(int32_t table_id, size_t index) { | ||
auto builder_func = GetTDefFunctionByNumber(table_id); | ||
builder_func((void*)&table_defs, index, dsdgenCtx_); | ||
row_stop(table_id, dsdgenCtx_); | ||
} | ||
|
||
int64_t DSDGenIterator::getRowCount(int32_t table) { | ||
return get_rowcount(table, dsdgenCtx_); | ||
} | ||
|
||
} // namespace facebook::presto::connector::tpcds |
68 changes: 68 additions & 0 deletions
68
presto-native-execution/presto_cpp/main/connectors/tpcds/DSDGenIterator.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
|
||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h" | ||
#include "presto_cpp/external/dsdgen/include/tpcds_constants.hpp" | ||
#include "presto_cpp/main/connectors/tpcds/include/append_info-c.hpp" | ||
|
||
using namespace facebook::velox; | ||
namespace facebook::presto::connector::tpcds { | ||
|
||
typedef int64_t ds_key_t; | ||
|
||
typedef int (*tpcds_builder_func)(void*, ds_key_t, DSDGenContext& dsdgenCtx); | ||
|
||
void InitializeDSDgen( | ||
double scale, | ||
vector_size_t parallel, | ||
vector_size_t child, | ||
DSDGenContext& dsdGenContext); | ||
|
||
std::string getQuery(int query); | ||
|
||
/// This class exposes a thread-safe and reproducible iterator over TPC-DS | ||
/// synthetically generated data, backed by DSDGEN. | ||
class DSDGenIterator { | ||
public: | ||
explicit DSDGenIterator( | ||
double scaleFactor, | ||
vector_size_t parallel, | ||
vector_size_t child); | ||
|
||
void initializeTable(const std::vector<VectorPtr>& children, int table); | ||
|
||
std::vector<std::unique_ptr<tpcds_table_def>>& getTableDefs(); | ||
|
||
// Before generating records using the gen*() functions below, call the | ||
// appropriate init*() function to correctly initialize the seed given the | ||
// offset to be generated. | ||
void initTableOffset(int32_t table_id, size_t offset); | ||
|
||
// Generate different types of records. | ||
void genRow(int32_t table_id, size_t index); | ||
|
||
ds_key_t getRowCount(int32_t table_id); | ||
|
||
tpcds_builder_func GetTDefFunctionByNumber(int table_id); | ||
|
||
protected: | ||
DSDGenContext dsdgenCtx_; | ||
std::vector<std::unique_ptr<tpcds_table_def>> table_defs; | ||
}; | ||
|
||
} // namespace facebook::presto::connector::tpcds |
Oops, something went wrong.