From 164556540f94b7017168c1135f1344b46bb36972 Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Fri, 13 Sep 2024 14:39:53 +0800 Subject: [PATCH] disable multiple thread csv scan in arrow Signed-off-by: Yuan Zhou --- dev/ci-velox-buildstatic-centos-7.sh | 2 +- ep/build-velox/src/modify_arrow.patch | 74 +++++++++++++++------------ 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/dev/ci-velox-buildstatic-centos-7.sh b/dev/ci-velox-buildstatic-centos-7.sh index 3272de95d910d..6304e77c104ae 100755 --- a/dev/ci-velox-buildstatic-centos-7.sh +++ b/dev/ci-velox-buildstatic-centos-7.sh @@ -4,5 +4,5 @@ set -e source /opt/rh/devtoolset-9/enable export NUM_THREADS=4 -./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \ +./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=ON --build_tests=OFF --build_benchmarks=OFF \ --build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON diff --git a/ep/build-velox/src/modify_arrow.patch b/ep/build-velox/src/modify_arrow.patch index 7d4d8e557b582..991719ba070b6 100644 --- a/ep/build-velox/src/modify_arrow.patch +++ b/ep/build-velox/src/modify_arrow.patch @@ -1,5 +1,5 @@ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt -index d56f6a36d..9b4088df9 100644 +index d56f6a36de..9b4088df92 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -773,8 +773,7 @@ if(ARROW_ORC) @@ -11,7 +11,7 @@ index d56f6a36d..9b4088df9 100644 + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc) endif() endif() - + @@ -823,9 +822,6 @@ if(ARROW_WITH_OPENTELEMETRY) opentelemetry-cpp::ostream_span_exporter opentelemetry-cpp::otlp_http_exporter) @@ -21,11 +21,11 @@ index d56f6a36d..9b4088df9 100644 - endif() list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) endif() - + @@ -860,6 +856,14 @@ if(ARROW_USE_XSIMD) list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD}) endif() - + +# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY) +# because they depend on Protobuf. +if(ARROW_WITH_PROTOBUF) @@ -38,7 +38,7 @@ index d56f6a36d..9b4088df9 100644 add_custom_target(arrow_benchmark_dependencies) add_custom_target(arrow_test_dependencies) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake -index a2627c190..e453512e6 100644 +index a2627c190f..e453512e62 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2557,13 +2557,9 @@ if(ARROW_WITH_ZSTD) @@ -58,7 +58,7 @@ index a2627c190..e453512e6 100644 message(STATUS "Found Zstandard: ${ARROW_ZSTD_LIBZSTD}") endif() diff --git a/cpp/src/arrow/c/helpers.h b/cpp/src/arrow/c/helpers.h -index a24f272fe..e25f78c85 100644 +index a24f272fea..e25f78c855 100644 --- a/cpp/src/arrow/c/helpers.h +++ b/cpp/src/arrow/c/helpers.h @@ -17,6 +17,7 @@ @@ -69,11 +69,34 @@ index a24f272fe..e25f78c85 100644 #include #include #include +diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h +index 7723dcedc6..23c76d928a 100644 +--- a/cpp/src/arrow/csv/options.h ++++ b/cpp/src/arrow/csv/options.h +@@ -139,7 +139,7 @@ struct ARROW_EXPORT ReadOptions { + // Reader options + + /// Whether to use the global CPU thread pool +- bool use_threads = true; ++ bool use_threads = false; + + /// \brief Block size we request from the IO layer. + /// diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc -index d2d976677..d7dd01ecd 100644 +index d2d976677b..8d7dafd840 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc -@@ -126,20 +126,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener { +@@ -27,7 +27,9 @@ + #include "arrow/dataset/file_base.h" + #include "arrow/filesystem/localfs.h" + #include "arrow/filesystem/path_util.h" ++#ifdef ARROW_S3 + #include "arrow/filesystem/s3fs.h" ++#endif + #include "arrow/engine/substrait/util.h" + #include "arrow/engine/substrait/serde.h" + #include "arrow/engine/substrait/relation.h" +@@ -126,20 +128,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener { : vm_(vm), java_reservation_listener_(java_reservation_listener) {} arrow::Status OnReservation(int64_t size) override { @@ -96,8 +119,18 @@ index d2d976677..d7dd01ecd 100644 env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size); RETURN_NOT_OK(arrow::dataset::jni::CheckException(env)); return arrow::Status::OK(); +@@ -622,7 +618,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe + JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( + JNIEnv* env, jobject) { + JNI_METHOD_START ++#ifdef ARROW_S3 + JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); ++#endif + JNI_METHOD_END() + } + diff --git a/java/pom.xml b/java/pom.xml -index a8328576b..57f282c6c 100644 +index a8328576b1..57f282c6c5 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1101,7 +1101,8 @@ @@ -110,26 +143,3 @@ index a8328576b..57f282c6c 100644 -DARROW_SUBSTRAIT=${ARROW_DATASET} -DARROW_USE_CCACHE=ON -DCMAKE_BUILD_TYPE=Release -diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc -index d2d976677..eb4b6d1d2 100644 ---- a/java/dataset/src/main/cpp/jni_wrapper.cc -+++ b/java/dataset/src/main/cpp/jni_wrapper.cc -@@ -27,7 +27,9 @@ - #include "arrow/dataset/file_base.h" - #include "arrow/filesystem/localfs.h" - #include "arrow/filesystem/path_util.h" -+#ifdef ARROW_S3 - #include "arrow/filesystem/s3fs.h" -+#endif - #include "arrow/engine/substrait/util.h" - #include "arrow/engine/substrait/serde.h" - #include "arrow/engine/substrait/relation.h" -@@ -622,7 +624,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe - JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( - JNIEnv* env, jobject) { - JNI_METHOD_START -+#ifdef ARROW_S3 - JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); -+#endif - JNI_METHOD_END() - }