-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Pramod Satya <pramod.satya@ibm.com>
- Loading branch information
1 parent
b895ea7
commit ce9914b
Showing
32 changed files
with
2,492 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
connector.name=tpcds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
presto-native-execution/presto_cpp/main/connectors/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
add_subdirectory(tpcds) |
51 changes: 51 additions & 0 deletions
51
presto-native-execution/presto_cpp/main/connectors/tpcds/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
cmake_policy(SET CMP0079 NEW) | ||
|
||
project(TPCDS) | ||
|
||
add_library(presto_tpcds_connector OBJECT TpcdsConnector.cpp) | ||
target_link_libraries(presto_tpcds_connector velox_connector tpcds_gen fmt::fmt) | ||
|
||
# Without this hack, there are multiple link errors similar to the one below | ||
# only on GCC. "undefined reference to `vtable for | ||
# velox::connector::tpcds::TpcdsTableHandle`. TODO: Fix this hack. | ||
target_link_libraries(velox_exec_test_lib presto_tpcds_connector) | ||
|
||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") | ||
add_compile_options(-Wno-deprecated-declarations -Wno-writable-strings | ||
-Wno-missing-field-initializers) | ||
endif() | ||
|
||
# This stringop-overflow warning seems to have lots of false positives and has | ||
# been the source of a lot of compiler bug reports (e.g. | ||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578), which causes | ||
# parquet-amalgamation.cpp to fail to compile. For now, we disable this warning | ||
# on the affected compiler (GCC). | ||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") | ||
add_compile_options(-Wno-stringop-overflow -Wno-write-strings) | ||
endif() | ||
|
||
# Add subdirectories | ||
add_subdirectory(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/dsdgen-c build) | ||
|
||
add_library(append_info OBJECT utils/append_info-c.cpp) | ||
target_link_libraries(append_info velox_vector_test_lib Folly::folly xsimd) | ||
target_link_libraries(dsdgen_c append_info) | ||
|
||
add_library(tpcds_gen TpcdsGen.cpp DSDGenIterator.cpp) | ||
target_include_directories(tpcds_gen PUBLIC dsdgen/include) | ||
target_link_libraries(tpcds_gen velox_memory velox_vector dsdgen_c append_info | ||
fmt::fmt) |
98 changes: 98 additions & 0 deletions
98
presto-native-execution/presto_cpp/main/connectors/tpcds/DSDGenIterator.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "presto_cpp/main/connectors/tpcds/DSDGenIterator.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/genrand.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/parallel.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/params.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/scaling.h" | ||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/tdefs.h" | ||
#include "velox/common/base/Exceptions.h" | ||
|
||
using namespace facebook::velox; | ||
|
||
namespace facebook::presto::connector::tpcds { | ||
|
||
void initializeDSDgen( | ||
double scale, | ||
int32_t parallel, | ||
int32_t child, | ||
DSDGenContext& dsdGenContext) { | ||
dsdGenContext.Reset(); | ||
resetCountCount(); | ||
|
||
std::string scaleStr = std::to_string(scale); | ||
set_str("SCALE", scaleStr.c_str(), dsdGenContext); | ||
std::string parallelStr = std::to_string(parallel); | ||
set_str("PARALLEL", parallelStr.c_str(), dsdGenContext); | ||
std::string childStr = std::to_string(child); | ||
set_str("CHILD", childStr.c_str(), dsdGenContext); | ||
|
||
init_rand(dsdGenContext); // no random numbers without this | ||
} | ||
|
||
std::string getQuery(int query) { | ||
if (query <= 0 || query > TPCDS_QUERIES_COUNT) { | ||
throw std::exception(); | ||
} | ||
return TPCDS_QUERIES[query - 1]; | ||
} | ||
|
||
DSDGenIterator::DSDGenIterator( | ||
double scaleFactor, | ||
int32_t parallel, | ||
int32_t child) { | ||
tableDefs_.resize(DBGEN_VERSION); // there are 24 TPC-DS tables | ||
VELOX_CHECK_GE(scaleFactor, 0.0, "Tpcds scale factor must be non-negative"); | ||
initializeDSDgen(scaleFactor, parallel, child, dsdgenCtx_); | ||
} | ||
|
||
void DSDGenIterator::initializeTable( | ||
const std::vector<VectorPtr>& children, | ||
int table_id) { | ||
auto tdef = getSimpleTdefsByNumber(table_id, dsdgenCtx_); | ||
TpcdsTableDef table_def; | ||
table_def.name = tdef->name; | ||
table_def.fl_child = tdef->flags & FL_CHILD ? 1 : 0; | ||
table_def.fl_small = tdef->flags & FL_SMALL ? 1 : 0; | ||
table_def.first_column = tdef->nFirstColumn; | ||
table_def.children = children; | ||
table_def.dsdGenContext = &dsdgenCtx_; | ||
tableDefs_[table_id] = std::make_unique<TpcdsTableDef>(table_def); | ||
} | ||
|
||
std::vector<std::unique_ptr<TpcdsTableDef>>& DSDGenIterator::getTableDefs() { | ||
return tableDefs_; | ||
}; | ||
|
||
tpcds_builder_func DSDGenIterator::getTDefFunctionByNumber(int table_id) { | ||
auto table_funcs = getTdefFunctionsByNumber(table_id); | ||
return table_funcs->builder; | ||
} | ||
|
||
void DSDGenIterator::initTableOffset(int32_t table_id, size_t offset) { | ||
row_skip(table_id, offset, dsdgenCtx_); | ||
} | ||
void DSDGenIterator::genRow(int32_t table_id, size_t index) { | ||
auto builder_func = getTDefFunctionByNumber(table_id); | ||
builder_func((void*)&tableDefs_, index, dsdgenCtx_); | ||
row_stop(table_id, dsdgenCtx_); | ||
} | ||
|
||
int64_t DSDGenIterator::getRowCount(int32_t table) { | ||
return get_rowcount(table, dsdgenCtx_); | ||
} | ||
|
||
} // namespace facebook::presto::connector::tpcds |
74 changes: 74 additions & 0 deletions
74
presto-native-execution/presto_cpp/main/connectors/tpcds/DSDGenIterator.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
|
||
#include "presto_cpp/external/dsdgen/include/dsdgen-c/dist.h" | ||
#include "presto_cpp/external/dsdgen/include/tpcds_constants.hpp" | ||
#include "presto_cpp/main/connectors/tpcds/utils/append_info-c.h" | ||
|
||
using namespace facebook::velox; | ||
namespace facebook::presto::connector::tpcds { | ||
|
||
typedef int64_t ds_key_t; | ||
|
||
typedef int (*tpcds_builder_func)(void*, ds_key_t, DSDGenContext& dsdgenCtx); | ||
|
||
void initializeDSDgen( | ||
double scale, | ||
int32_t parallel, | ||
int32_t child, | ||
DSDGenContext& dsdGenContext); | ||
|
||
std::string getQuery(int query); | ||
|
||
/// This class exposes a thread-safe and reproducible iterator over TPC-DS | ||
/// synthetically generated data, backed by DSDGEN. | ||
class DSDGenIterator { | ||
public: | ||
explicit DSDGenIterator(double scaleFactor, int32_t parallel, int32_t child); | ||
|
||
/// Initializes the table definition and the table schema. | ||
void initializeTable(const std::vector<VectorPtr>& children, int table); | ||
|
||
/// Returns a vector of all the table definitions. | ||
std::vector<std::unique_ptr<TpcdsTableDef>>& getTableDefs(); | ||
|
||
// Before generating records using the gen*() functions below, call the | ||
// initTableOffset(int32_t table_id, size_t offset) function to correctly | ||
// initialize the seed given the offset to be generated. | ||
// table_id corresponds to the table that needs to be generated and offset | ||
// specifies the number of rows to skip before using the gen*() functions. | ||
void initTableOffset(int32_t table_id, size_t offset); | ||
|
||
/// Generate different types of records. | ||
// table_id corresponds to the table that is to be generated and row is the | ||
// row to be generated. | ||
void genRow(int32_t table_id, size_t row); | ||
|
||
/// Gets the row count for a table. | ||
ds_key_t getRowCount(int32_t table_id); | ||
|
||
// Gets the metadata for a table, which hold information about the mk_*() | ||
// functions responsible for generating the data. | ||
tpcds_builder_func getTDefFunctionByNumber(int table_id); | ||
|
||
protected: | ||
DSDGenContext dsdgenCtx_; | ||
std::vector<std::unique_ptr<TpcdsTableDef>> tableDefs_; | ||
}; | ||
|
||
} // namespace facebook::presto::connector::tpcds |
Oops, something went wrong.