Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[native] Add TPC-DS connector #23067

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ jobs:
-DCMAKE_BUILD_TYPE=Debug \
-DPRESTO_ENABLE_PARQUET=ON \
-DPRESTO_ENABLE_REMOTE_FUNCTIONS=ON \
-DPRESTO_ENABLE_TPCDS_CONNECTOR=ON \
-DPRESTO_ENABLE_JWT=ON \
-DPRESTO_STATS_REPORTER_TYPE=PROMETHEUS \
-DPRESTO_MEMORY_CHECKER_TYPE=LINUX_MEMORY_CHECKER \
Expand Down
4 changes: 3 additions & 1 deletion presto-docs/src/main/sphinx/presto-cpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,6 @@ Only specific connectors are supported in the Presto C++ evaluation engine.

* Iceberg connector supports both V1 and V2 tables, including tables with delete files.

* TPCH connector, with ``tpch.naming=standard`` catalog property.
* TPCH connector, with ``tpch.naming=standard`` catalog property.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a column naming issue for TPCDS similar to TPCH?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not aware of the column naming issue for TPCH, can you please elaborate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TPCH columns have a prefix. E.g. for the lineitem table the columns have l_ as prefix.

CREATE TABLE tpch.tiny.lineitem ( "l_orderkey" bigint NOT NULL, "l_partkey" bigint NOT NULL, "l_suppkey" bigint NOT NULL, "l_linenumber" integer NOT NULL, "l_quantity" double NOT NULL, "l_extendedprice" double NOT NULL, "l_discount" double NOT NULL, "l_tax" double NOT NULL, "l_returnflag" varchar(1) NOT NULL, "l_linestatus" varchar(1) NOT NULL, "l_shipdate" date NOT NULL, "l_commitdate" date NOT NULL, "l_receiptdate" date NOT NULL, "l_shipinstruct" varchar(25) NOT NULL, "l_shipmode" varchar(10) NOT NULL, "l_comment" varchar(44) NOT NULL )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the TPCDS column does not have a prefix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wrong, actually it does have a prefix.

SELECT cc_call_center_sk, cc_name, cc_manager, cc_mkt_id, cc_mkt_class FROM call_center

* TPCDS connector.
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@ public static DistributedQueryRunner createQueryRunner(
queryRunner.installPlugin(new TpcdsPlugin());
queryRunner.installPlugin(new TestingHiveEventListenerPlugin());
queryRunner.createCatalog("tpch", "tpch");
queryRunner.createCatalog("tpcds", "tpcds");
Map<String, String> tpcdsProperties = ImmutableMap.<String, String>builder()
.put("tpcds.use-varchar-type", "true")
.build();
queryRunner.createCatalog("tpcds", "tpcds", tpcdsProperties);
Copy link
Contributor

@aditi-pandit aditi-pandit Oct 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This requires PRESTO_ENABLE_TPCDS_CONNECTOR to always be set. I feel we should remove that compilation option and always build and use TpcdsConnector.

Can you check the Java behavior ? We should make native and java consistent.

Map<String, String> tpchProperties = ImmutableMap.<String, String>builder()
.put("tpch.column-naming", "standard")
.build();
Expand Down
6 changes: 6 additions & 0 deletions presto-native-execution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ option(PRESTO_ENABLE_TESTING "Enable tests" ON)

option(PRESTO_ENABLE_JWT "Enable JWT (JSON Web Token) authentication" OFF)

option(PRESTO_ENABLE_TPCDS_CONNECTOR "Enable TPC-DS connector" OFF)

# Set all Velox options below
add_compile_definitions(FOLLY_HAVE_INT128_T=1)

Expand Down Expand Up @@ -219,6 +221,10 @@ if(PRESTO_ENABLE_JWT)
add_compile_definitions(PRESTO_ENABLE_JWT)
endif()

if(PRESTO_ENABLE_TPCDS_CONNECTOR)
add_compile_definitions(PRESTO_ENABLE_TPCDS_CONNECTOR)
endif()

if("${MAX_LINK_JOBS}")
set_property(GLOBAL APPEND PROPERTY JOB_POOLS
"presto_link_job_pool=${MAX_LINK_JOBS}")
Expand Down
3 changes: 3 additions & 0 deletions presto-native-execution/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ endif
ifeq ($(PRESTO_ENABLE_JWT), ON)
EXTRA_CMAKE_FLAGS += -DPRESTO_ENABLE_JWT=ON
endif
ifeq ($(PRESTO_ENABLE_TPCDS_CONNECTOR), ON)
EXTRA_CMAKE_FLAGS += -PRESTO_ENABLE_TPCDS_CONNECTOR=ON
endif
ifneq ($(PRESTO_STATS_REPORTER_TYPE),)
EXTRA_CMAKE_FLAGS += -DPRESTO_STATS_REPORTER_TYPE=$(PRESTO_STATS_REPORTER_TYPE)
endif
Expand Down
320 changes: 320 additions & 0 deletions presto-native-execution/presto_cpp/external/dsdgen/LICENSE

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#
# Copyright owned by the Transaction Processing Performance Council.
#
# A copy of the license is included under external/dsdgen/LICENSE
# in this repository.
#
# You may not use this file except in compliance with the License.
#
# THE TPC SOFTWARE IS AVAILABLE WITHOUT CHARGE FROM TPC.
#

add_library(
dsdgen_c OBJECT
skip_days.cpp
address.cpp
build_support.cpp
date.cpp
dbgen_version.cpp
decimal.cpp
dist.cpp
error_msg.cpp
genrand.cpp
join.cpp
list.cpp
load.cpp
misc.cpp
nulls.cpp
parallel.cpp
permute.cpp
pricing.cpp
r_params.cpp
scaling.cpp
scd.cpp
sparse.cpp
StringBuffer.cpp
tdef_functions.cpp
tdefs.cpp
text.cpp
w_call_center.cpp
w_catalog_page.cpp
w_catalog_returns.cpp
w_catalog_sales.cpp
w_customer.cpp
w_customer_address.cpp
w_customer_demographics.cpp
w_datetbl.cpp
w_household_demographics.cpp
w_income_band.cpp
w_inventory.cpp
w_item.cpp
w_promotion.cpp
w_reason.cpp
w_ship_mode.cpp
w_store.cpp
w_store_returns.cpp
w_store_sales.cpp
w_timetbl.cpp
w_warehouse.cpp
w_web_page.cpp
w_web_returns.cpp
w_web_sales.cpp
w_web_site.cpp)

include_directories(${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include)
include_directories(
${CMAKE_SOURCE_DIR}/presto_cpp/external/dsdgen/include/dsdgen-c)
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
* Copyright owned by the Transaction Processing Performance Council.
*
* A copy of the license is included under external/dsdgen/LICENSE
* in this repository.
*
* You may not use this file except in compliance with the License.
*
* THE TPC SOFTWARE IS AVAILABLE WITHOUT CHARGE FROM TPC.
*/

#include <assert.h>
#include <stdio.h>
#include "config.h"
#include "porting.h"
#ifndef USE_STDLIB_H
#include <malloc.h>
#endif
#include "StringBuffer.h"

/*
* Routine: InitBuffer
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
StringBuffer_t* InitBuffer(int nSize, int nIncrement) {
StringBuffer_t* pBuf = nullptr;

pBuf = (StringBuffer_t*)malloc(sizeof(struct STRING_BUFFER_T));
MALLOC_CHECK(pBuf);
if (pBuf == NULL)
return (NULL);
memset((void*)pBuf, 0, sizeof(struct STRING_BUFFER_T));

pBuf->pText = (char*)malloc(sizeof(char) * nSize);
MALLOC_CHECK(pBuf->pText);
if (pBuf->pText == NULL)
return (NULL);
memset((void*)pBuf->pText, 0, sizeof(char) * nSize);

pBuf->nIncrement = nIncrement;
pBuf->nBytesAllocated = nSize;
pBuf->nFlags = SB_INIT;

return (pBuf);
}

/*
* Routine: AddBuffer
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int AddBuffer(StringBuffer_t* pBuf, char* pStr) {
int nRemaining = pBuf->nBytesAllocated - pBuf->nBytesUsed,
nRequested = strlen(pStr);

if (!nRequested)
return (0);

while (nRequested >= nRemaining) {
pBuf->pText = (char*)realloc(
(void*)pBuf->pText, pBuf->nBytesAllocated + pBuf->nIncrement);
if (!pBuf->pText)
return (-1);
pBuf->nBytesAllocated += pBuf->nIncrement;
nRemaining += pBuf->nIncrement;
}

strncat(pBuf->pText, pStr, pBuf->nBytesAllocated);
if (pBuf->nBytesUsed == 0) /* first string adds a terminator */
pBuf->nBytesUsed = 1;
pBuf->nBytesUsed += nRequested;

return (0);
}

/*
* Routine: ResetStringBuffer
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
int ResetBuffer(StringBuffer_t* pBuf) {
pBuf->nBytesUsed = 0;
if (pBuf->nBytesAllocated)
pBuf->pText[0] = '\0';

return (0);
}

/*
* Routine: GetBuffer
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
char* GetBuffer(StringBuffer_t* pBuf) {
return (pBuf->pText);
}

/*
* Routine: FreeBuffer
* Purpose:
* Algorithm:
* Data Structures:
*
* Params:
* Returns:
* Called By:
* Calls:
* Assumptions:
* Side Effects:
* TODO: None
*/
void FreeBuffer(StringBuffer_t* pBuf) {
if (!pBuf)
return;
if (pBuf->pText)
free((void*)pBuf->pText);
free((void*)pBuf);

return;
}
Loading
Loading