Skip to content

Commit

Permalink
disable multiple thread csv scan in arrow
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
  • Loading branch information
zhouyuan committed Sep 13, 2024
1 parent 7688f6d commit 1645565
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 33 deletions.
2 changes: 1 addition & 1 deletion dev/ci-velox-buildstatic-centos-7.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ set -e

source /opt/rh/devtoolset-9/enable
export NUM_THREADS=4
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=ON --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
74 changes: 42 additions & 32 deletions ep/build-velox/src/modify_arrow.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d56f6a36d..9b4088df9 100644
index d56f6a36de..9b4088df92 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -773,8 +773,7 @@ if(ARROW_ORC)
Expand All @@ -11,7 +11,7 @@ index d56f6a36d..9b4088df9 100644
+ list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc)
endif()
endif()

@@ -823,9 +822,6 @@ if(ARROW_WITH_OPENTELEMETRY)
opentelemetry-cpp::ostream_span_exporter
opentelemetry-cpp::otlp_http_exporter)
Expand All @@ -21,11 +21,11 @@ index d56f6a36d..9b4088df9 100644
- endif()
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
endif()

@@ -860,6 +856,14 @@ if(ARROW_USE_XSIMD)
list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD})
endif()

+# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY)
+# because they depend on Protobuf.
+if(ARROW_WITH_PROTOBUF)
Expand All @@ -38,7 +38,7 @@ index d56f6a36d..9b4088df9 100644
add_custom_target(arrow_benchmark_dependencies)
add_custom_target(arrow_test_dependencies)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index a2627c190..e453512e6 100644
index a2627c190f..e453512e62 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2557,13 +2557,9 @@ if(ARROW_WITH_ZSTD)
Expand All @@ -58,7 +58,7 @@ index a2627c190..e453512e6 100644
message(STATUS "Found Zstandard: ${ARROW_ZSTD_LIBZSTD}")
endif()
diff --git a/cpp/src/arrow/c/helpers.h b/cpp/src/arrow/c/helpers.h
index a24f272fe..e25f78c85 100644
index a24f272fea..e25f78c855 100644
--- a/cpp/src/arrow/c/helpers.h
+++ b/cpp/src/arrow/c/helpers.h
@@ -17,6 +17,7 @@
Expand All @@ -69,11 +69,34 @@ index a24f272fe..e25f78c85 100644
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h
index 7723dcedc6..23c76d928a 100644
--- a/cpp/src/arrow/csv/options.h
+++ b/cpp/src/arrow/csv/options.h
@@ -139,7 +139,7 @@ struct ARROW_EXPORT ReadOptions {
// Reader options

/// Whether to use the global CPU thread pool
- bool use_threads = true;
+ bool use_threads = false;

/// \brief Block size we request from the IO layer.
///
diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index d2d976677..d7dd01ecd 100644
index d2d976677b..8d7dafd840 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -126,20 +126,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
@@ -27,7 +27,9 @@
#include "arrow/dataset/file_base.h"
#include "arrow/filesystem/localfs.h"
#include "arrow/filesystem/path_util.h"
+#ifdef ARROW_S3
#include "arrow/filesystem/s3fs.h"
+#endif
#include "arrow/engine/substrait/util.h"
#include "arrow/engine/substrait/serde.h"
#include "arrow/engine/substrait/relation.h"
@@ -126,20 +128,14 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
: vm_(vm), java_reservation_listener_(java_reservation_listener) {}

arrow::Status OnReservation(int64_t size) override {
Expand All @@ -96,8 +119,18 @@ index d2d976677..d7dd01ecd 100644
env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size);
RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
return arrow::Status::OK();
@@ -622,7 +618,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe
JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized(
JNIEnv* env, jobject) {
JNI_METHOD_START
+#ifdef ARROW_S3
JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized());
+#endif
JNI_METHOD_END()
}

diff --git a/java/pom.xml b/java/pom.xml
index a8328576b..57f282c6c 100644
index a8328576b1..57f282c6c5 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1101,7 +1101,8 @@
Expand All @@ -110,26 +143,3 @@ index a8328576b..57f282c6c 100644
-DARROW_SUBSTRAIT=${ARROW_DATASET}
-DARROW_USE_CCACHE=ON
-DCMAKE_BUILD_TYPE=Release
diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index d2d976677..eb4b6d1d2 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -27,7 +27,9 @@
#include "arrow/dataset/file_base.h"
#include "arrow/filesystem/localfs.h"
#include "arrow/filesystem/path_util.h"
+#ifdef ARROW_S3
#include "arrow/filesystem/s3fs.h"
+#endif
#include "arrow/engine/substrait/util.h"
#include "arrow/engine/substrait/serde.h"
#include "arrow/engine/substrait/relation.h"
@@ -622,7 +624,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe
JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized(
JNIEnv* env, jobject) {
JNI_METHOD_START
+#ifdef ARROW_S3
JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized());
+#endif
JNI_METHOD_END()
}

0 comments on commit 1645565

Please sign in to comment.