From dea475828857937a986f0a6bf5a8e4333979104a Mon Sep 17 00:00:00 2001 From: Minhan Cao Date: Fri, 24 Jan 2025 18:08:21 -0800 Subject: [PATCH] fix: Make TPCH dbgen text buffer size consistent with Presto Java (#12169) Summary: Changed text buffer size to be 300 MB for Velox's dbgen to match with Java Presto TPCH dbgen's text buffer size. The text buffer size is used in randomly generating offset and length to grab a chunk from the overall text for each row. This fixed the difference in the comment column for the tables in TPCH. Java: https://github.com/trinodb/tpch/blob/master/src/main/java/io/trino/tpch/TextPool.java#L35 ``` private static final int DEFAULT_TEXT_POOL_SIZE = 300 * 1024 * 1024; ``` C++: https://github.com/facebookincubator/velox/blob/main/velox/tpch/gen/DBGenIterator.cpp#L40 ``` load_dists( 10 * 1024 * 1024, &dbgenCtx); // 10 MB buffer size for text generation. ``` Resolves: https://github.com/prestodb/presto/issues/24011 Pull Request resolved: https://github.com/facebookincubator/velox/pull/12169 Reviewed By: amitkdutta Differential Revision: D68653706 Pulled By: xiaoxmeng fbshipit-source-id: 635cc572bc79c33662e26124589992bcf6962830 --- velox/connectors/tpch/tests/TpchConnectorTest.cpp | 10 +++++----- velox/tpch/gen/DBGenIterator.cpp | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/velox/connectors/tpch/tests/TpchConnectorTest.cpp b/velox/connectors/tpch/tests/TpchConnectorTest.cpp index f4600ac00379..c2f1ec7380d7 100644 --- a/velox/connectors/tpch/tests/TpchConnectorTest.cpp +++ b/velox/connectors/tpch/tests/TpchConnectorTest.cpp @@ -97,11 +97,11 @@ TEST_F(TpchConnectorTest, simple) { makeFlatVector({0, 1, 1, 1, 4}), // n_comment makeFlatVector({ - "furiously regular requests. platelets affix furious", - "instructions wake quickly. final deposits haggle. final, silent theodolites ", - "asymptotes use fluffily quickly bold instructions. slyly bold dependencies sleep carefully pending accounts", - "ss deposits wake across the pending foxes. packages after the carefully bold requests integrate caref", - "usly ironic, pending foxes. even, special instructions nag. sly, final foxes detect slyly fluffily ", + " haggle. carefully final deposits detect slyly agai", + "al foxes promise slyly according to the regular accounts. bold requests alon", + "y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special ", + "eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold", + "y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d", }), }); test::assertEqualVectors(expected, output); diff --git a/velox/tpch/gen/DBGenIterator.cpp b/velox/tpch/gen/DBGenIterator.cpp index ec99183725bb..2849f29e7d5f 100644 --- a/velox/tpch/gen/DBGenIterator.cpp +++ b/velox/tpch/gen/DBGenIterator.cpp @@ -37,7 +37,8 @@ class DBGenBackend { // structures required by dbgen are populated. DBGenContext dbgenCtx; load_dists( - 10 * 1024 * 1024, &dbgenCtx); // 10 MB buffer size for text generation. + 300 * 1024 * 1024, + &dbgenCtx); // 300 MB buffer size for text generation. } ~DBGenBackend() { cleanup_dists();