Skip to content

Commit

Permalink
feat(thirdparty): Bump Hadoop to 3.3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
acelyc111 committed Sep 23, 2024
1 parent c90aa48 commit db353c5
Show file tree
Hide file tree
Showing 16 changed files with 171 additions and 86 deletions.
6 changes: 4 additions & 2 deletions .github/actions/rebuild_thirdparty_if_needed/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ runs:
cmake --build build/ -j $(nproc)
rm -rf build/Build build/Download/[a-y]* build/Source/[a-g]* build/Source/[i-q]* build/Source/[s-z]*
find ./ -name '*CMakeFiles*' -type d -exec rm -rf "{}" +
../build_tools/download_hadoop.sh hadoop-bin
../build_tools/download_zk.sh zookeeper-bin
../admin_tools/download_hadoop.sh hadoop-bin
../admin_tools/download_zk.sh zookeeper-bin
rm -rf hadoop-bin/share/doc
rm -rf zookeeper-bin/docs
mv hadoop-bin ..
mv zookeeper-bin ..
shell: bash
2 changes: 0 additions & 2 deletions .github/actions/upload_artifact/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ runs:
steps:
- name: Tar files
run: |
mv thirdparty/hadoop-bin ./
mv thirdparty/zookeeper-bin ./
rm -rf thirdparty
# The following operations are tricky, these directories and files don't exist if not build with '--test'.
# When build binaries for client tests, it's not needed to add '--test'.
Expand Down
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ header:
- 'src/replica/duplication/test/log.1.0.handle_real_private_log2'
- 'src/replica/duplication/test/log.1.0.all_loaded_are_write_empties'
# Used for patches for thirdparties.
- 'thirdparty/fix_hdfs_native_client.patch'
- 'thirdparty/fix_jemalloc_for_m1_on_macos.patch'
- 'thirdparty/fix_libevent_for_macos.patch'
- 'thirdparty/fix_rocksdb-cmake-PORTABLE-option.patch'
Expand Down
File renamed without changes.
21 changes: 16 additions & 5 deletions build_tools/download_hadoop.sh → admin_tools/download_hadoop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,24 @@

set -e

CWD=$(cd $(dirname $0) && pwd)
CWD=$(cd "$(dirname "$0")" && pwd)

if [ $# -ge 1 ]; then
HADOOP_BIN_PATH=$1
fi

HADOOP_VERSION=2.8.4
HADOOP_DIR_NAME=hadoop-${HADOOP_VERSION}
HADOOP_PACKAGE_MD5="b30b409bb69185003b3babd1504ba224"
${CWD}/download_package.sh ${HADOOP_DIR_NAME} ${HADOOP_PACKAGE_MD5} ${HADOOP_BIN_PATH}
HADOOP_VERSION="hadoop-3.3.6"
arch_output=$(arch)
if [ "$arch_output"x == "aarch64"x ]; then
HADOOP_PACKAGE_MD5="369f899194a920e0d1c3c3bc1718b3b5"
HADOOP_BASE_NAME=${HADOOP_VERSION}-"$(arch)"
else
if [ "$arch_output"x != "x86_64"x ]; then
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
HADOOP_PACKAGE_MD5="1cbe1214299cd3bd282d33d3934b5cbd"
HADOOP_BASE_NAME=${HADOOP_VERSION}
fi

DOWNLOAD_BASE_URL="https://mirrors.aliyun.com/apache/hadoop/common/${HADOOP_VERSION}/"
"${CWD}"/download_package.sh "${HADOOP_BASE_NAME}" ${HADOOP_PACKAGE_MD5} "${HADOOP_BIN_PATH}" ${DOWNLOAD_BASE_URL} "${HADOOP_VERSION}"
52 changes: 31 additions & 21 deletions build_tools/download_package.sh → admin_tools/download_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,59 +21,69 @@ set -e

if [ $# -lt 2 ]; then
echo "Invalid arguments !"
echo "USAGE: $0 <DIR_NAME> <PACKAGE_MD5> [TARGET_PATH]"
echo "USAGE: $0 <PACKAGE_BASE_NAME> <PACKAGE_MD5> [TARGET_PATH]"
exit 1
fi

DIR_NAME=$1
PACKAGE_BASE_NAME=$1
PACKAGE_MD5=$2

if [ $# -lt 3 ]; then
echo "TARGET_PATH is not provided, thus do not try to download ${DIR_NAME}"
echo "TARGET_PATH is not provided, thus do not try to download ${PACKAGE_BASE_NAME}"
exit 0
fi

TARGET_PATH=$3
if [ -d ${TARGET_PATH} ]; then
echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download ${DIR_NAME}"
if [ -d "${TARGET_PATH}" ]; then
echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download ${PACKAGE_BASE_NAME}"
exit 0
fi

PACKAGE_NAME=${DIR_NAME}.tar.gz
if [ ! -f ${PACKAGE_NAME} ]; then
echo "Downloading ${DIR_NAME}..."
DEFAULT_DOWNLOAD_BASE_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/"
if [ $# -ge 4 ]; then
DEFAULT_DOWNLOAD_BASE_URL=$4
fi

DIR_NAME=${PACKAGE_BASE_NAME}
if [ $# -ge 5 ]; then
DIR_NAME=$5
fi

PACKAGE_NAME=${PACKAGE_BASE_NAME}.tar.gz
if [ ! -f "${PACKAGE_NAME}" ]; then
echo "Downloading ${PACKAGE_NAME} ..."

DOWNLOAD_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/${PACKAGE_NAME}"
if ! wget -T 10 -t 5 ${DOWNLOAD_URL}; then
echo "ERROR: download ${DIR_NAME} failed"
DOWNLOAD_URL=${DEFAULT_DOWNLOAD_BASE_URL}${PACKAGE_NAME}
if ! wget -q -T 10 -t 5 "${DOWNLOAD_URL}"; then
echo "ERROR: download ${PACKAGE_NAME} failed"
exit 1
fi

if [ `md5sum ${PACKAGE_NAME} | awk '{print$1}'` != ${PACKAGE_MD5} ]; then
if [ "$(md5sum "${PACKAGE_NAME}" | awk '{print$1}')" != "${PACKAGE_MD5}" ]; then
echo "Check file ${PACKAGE_NAME} md5sum failed!"
exit 1
fi
fi

rm -rf ${DIR_NAME}
rm -rf "${DIR_NAME}"

echo "Decompressing ${DIR_NAME}..."
if ! tar xf ${PACKAGE_NAME}; then
echo "ERROR: decompress ${DIR_NAME} failed"
rm -f ${PACKAGE_NAME}
echo "Decompressing ${PACKAGE_NAME} ..."
if ! tar xf "${PACKAGE_NAME}"; then
echo "ERROR: decompress ${PACKAGE_NAME} failed"
rm -f "${PACKAGE_NAME}"
exit 1
fi

rm -f ${PACKAGE_NAME}
rm -f "${PACKAGE_NAME}"

if [ ! -d ${DIR_NAME} ]; then
if [ ! -d "${DIR_NAME}" ]; then
echo "ERROR: ${DIR_NAME} does not exist"
exit 1
fi

if [ -d ${TARGET_PATH} ]; then
if [ -d "${TARGET_PATH}" ]; then
echo "TARGET_PATH ${TARGET_PATH} has been generated, which means it and ${DIR_NAME} are the same dir thus do not do mv any more"
exit 0
fi

mv ${DIR_NAME} ${TARGET_PATH}
mv "${DIR_NAME}" "${TARGET_PATH}"
File renamed without changes.
File renamed without changes.
File renamed without changes.
54 changes: 32 additions & 22 deletions build_tools/pack_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,30 +149,40 @@ pack_server_lib crypto $separate_servers
pack_server_lib ssl $separate_servers

# Pack hadoop-related files.
# If you want to use hdfs service to backup/restore/bulkload pegasus tables,
# you need to set env ${HADOOP_HOME}, edit ${HADOOP_HOME}/etc/hadoop/core-site.xml,
# and specify the keytab file.
if [ -n "$HADOOP_HOME" ] && [ -n "$keytab_file" ]; then
mkdir -p ${pack}/hadoop
copy_file $keytab_file ${pack}/hadoop
copy_file ${HADOOP_HOME}/etc/hadoop/core-site.xml ${pack}/hadoop
if [ -d $HADOOP_HOME/share/hadoop ]; then
for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
copy_file $f ${pack}/hadoop
done
# If you want to use hdfs service to backup/restore/bulkload pegasus tables, you need to
# set env ${HADOOP_HOME} to the proper directory where contains Hadoop *.jar files.
if [ -n "$HADOOP_HOME" ]; then
# Verify one of the jars.
arch_output=$(arch)
if [ "$arch_output"x == "aarch64"x ]; then
HDFS_JAR_MD5="fcc09dbed936cd8673918774cc3ead6b"
else
if [ "$arch_output"x != "x86_64"x ]; then
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
HDFS_JAR_MD5="f67f3a5613c885e1622b1056fd94262b"
fi
HDFS_JAR=${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-3.3.6.jar
if [ "$(md5sum "${HDFS_JAR}" | awk '{print$1}')" != "${HDFS_JAR_MD5}" ]; then
echo "check file ${HDFS_JAR} md5sum failed!"
exit 1
fi
# Pack the jars.
mkdir -p ${pack}/hadoop
for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
copy_file $f ${pack}/hadoop
done
else
echo "Couldn't find env ${HADOOP_HOME} or no valid keytab file was specified,
hadoop-related files were not packed."
echo "Couldn't find env HADOOP_HOME, hadoop-related files were not packed."
fi

DISTRIB_ID=$(cat /etc/*-release | grep DISTRIB_ID | awk -F'=' '{print $2}')
Expand Down
7 changes: 2 additions & 5 deletions build_tools/pack_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,10 @@ chmod -x ${pack}/lib/*

mkdir -p ${pack}/admin_tools
copy_file ./admin_tools/* ${pack}/admin_tools/
copy_file ./admin_tools/download_*.sh ${pack}/admin_tools/
copy_file ./admin_tools/*_zk.sh ${pack}/admin_tools/
chmod +x ${pack}/admin_tools/*.sh

mkdir -p ${pack}/build_tools
copy_file ./build_tools/download_*.sh ${pack}/build_tools/
copy_file ./build_tools/*_zk.sh ${pack}/build_tools/
chmod +x ${pack}/build_tools/*.sh

mkdir -p ${pack}/src/server
copy_file ./src/server/*.ini ${pack}/src/server/

Expand Down
4 changes: 2 additions & 2 deletions docker/thirdparties-bin/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ RUN git clone --depth=1 --branch=${GITHUB_BRANCH} ${GITHUB_REPOSITORY_URL} \
&& unzip /root/thirdparties-src.zip -d . \
&& cmake -DCMAKE_BUILD_TYPE=Release -DROCKSDB_PORTABLE=${ROCKSDB_PORTABLE} -DUSE_JEMALLOC=${USE_JEMALLOC} -B build/ . \
&& cmake --build build/ -j $(($(nproc)/2+1)) \
&& ../build_tools/download_hadoop.sh ${HADOOP_BIN_PATH} \
&& ../build_tools/download_zk.sh ${ZOOKEEPER_BIN_PATH} \
&& ../admin_tools/download_hadoop.sh ${HADOOP_BIN_PATH} \
&& ../admin_tools/download_zk.sh ${ZOOKEEPER_BIN_PATH} \
&& zip -r ~/thirdparties-bin.zip output/ build/Source/rocksdb/cmake build/Source/http-parser build/Source/hadoop build/Download/zookeeper ${HADOOP_BIN_PATH} ${ZOOKEEPER_BIN_PATH} \
&& cd ~ \
&& rm -rf incubator-pegasus;
17 changes: 10 additions & 7 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ export REPORT_DIR="$ROOT/test_report"
export THIRDPARTY_ROOT=${PEGASUS_THIRDPARTY_ROOT:-"$ROOT/thirdparty"}
ARCH_TYPE=''
arch_output=$(arch)
if [ "$arch_output"x == "x86_64"x ]; then
ARCH_TYPE="amd64"
elif [ "$arch_output"x == "aarch64"x ]; then
if [ "$arch_output"x == "aarch64"x ]; then
ARCH_TYPE="aarch64"
else
echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' as default"
if [ "$arch_output"x != "x86_64"x ]; then
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
ARCH_TYPE="amd64"
fi
export LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${BUILD_LATEST_DIR}/output/lib:${THIRDPARTY_ROOT}/output/lib:${LD_LIBRARY_PATH}
# Disable AddressSanitizerOneDefinitionRuleViolation, see https://github.com/google/sanitizers/issues/1017 for details.
Expand Down Expand Up @@ -656,7 +657,7 @@ function run_start_zk()
fi
fi

INSTALL_DIR="$INSTALL_DIR" PORT="$PORT" $ROOT/build_tools/start_zk.sh
INSTALL_DIR="$INSTALL_DIR" PORT="$PORT" $ROOT/admin_tools/start_zk.sh
}

#####################
Expand Down Expand Up @@ -693,7 +694,7 @@ function run_stop_zk()
esac
shift
done
INSTALL_DIR="$INSTALL_DIR" $ROOT/build_tools/stop_zk.sh
INSTALL_DIR="$INSTALL_DIR" $ROOT/admin_tools/stop_zk.sh
}

#####################
Expand Down Expand Up @@ -730,7 +731,7 @@ function run_clear_zk()
esac
shift
done
INSTALL_DIR="$INSTALL_DIR" $ROOT/build_tools/clear_zk.sh
INSTALL_DIR="$INSTALL_DIR" $ROOT/admin_tools/clear_zk.sh
}

#####################
Expand Down Expand Up @@ -2105,6 +2106,8 @@ case $cmd in
;;
pack_server)
shift
# source the config_hdfs.sh to get the HADOOP_HOME.
source "${ROOT}"/admin_tools/config_hdfs.sh
PEGASUS_ROOT=$ROOT ./build_tools/pack_server.sh $*
;;
pack_client)
Expand Down
2 changes: 1 addition & 1 deletion src/sample/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if [ "$arch_output"x == "x86_64"x ]; then
elif [ "$arch_output"x == "aarch64"x ]; then
ARCH_TYPE="aarch64"
else
echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' as default"
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
export LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${PEGASUS_THIRDPARTY_ROOT}/output/lib:$(pwd)/../../lib:${LD_LIBRARY_PATH}

Expand Down
59 changes: 40 additions & 19 deletions thirdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,51 @@ ExternalProject_Add(gperftools
DOWNLOAD_NO_PROGRESS true
)

ExternalProject_Add(abseil
URL ${OSS_URL_PREFIX}/abseil-20230802.1.zip
https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.zip
URL_MD5 5c6193dbc82834f8e762c6a28c9cc615
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DABSL_FIND_GOOGLETEST=OFF
-DCMAKE_CXX_STANDARD=17
DOWNLOAD_EXTRACT_TIMESTAMP true
DOWNLOAD_NO_PROGRESS true
)

ExternalProject_Add(protobuf
URL https://github.com/protocolbuffers/protobuf/archive/refs/tags/v27.0.tar.gz
URL_MD5 c96aaf02c8acea549d65bb7b2d549bf6
CMAKE_ARGS -DCMAKE_BUILD_TYPE=release
-Dprotobuf_BUILD_TESTS=OFF
-Dprotobuf_BUILD_PROTOC_BINARIES=ON
-Dprotobuf_BUILD_LIBUPB=ON
-Dprotobuf_ABSL_PROVIDER=package
-DBUILD_SHARED_LIBS=ON
-DBUILD_SHARED_HDFSPP=ON
-DHDFSPP_LIBRARY_ONLY=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_CXX_STANDARD=17
-DABSL_ROOT_DIR=${TP_OUTPUT}
-DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
DEPENDS abseil
DOWNLOAD_EXTRACT_TIMESTAMP true
DOWNLOAD_NO_PROGRESS true
)

set(HDFS_CLIENT_DIR "hadoop-hdfs-project/hadoop-hdfs-native-client")
ExternalProject_Add(hadoop
URL ${OSS_URL_PREFIX}/hadoop-release-2.8.4.tar.gz
https://github.com/apache/hadoop/archive/refs/tags/rel/release-2.8.4.tar.gz
URL_MD5 a1be737d4bff14923689619ab6545a96
PATCH_COMMAND ""
URL https://mirrors.aliyun.com/apache/hadoop/common/hadoop-3.3.6/hadoop-3.3.6-src.tar.gz
URL_MD5 285c07d8ad2c837c8ee04a4fa49c73cd
PATCH_COMMAND patch -p1 < ${TP_DIR}/fix_hdfs_native_client.patch
COMMAND cd ${HDFS_CLIENT_DIR} && mvn package -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar
COMMAND cd ${HDFS_CLIENT_DIR} && cp -R target/hadoop-hdfs-native-client-2.8.4/include/. ${TP_OUTPUT}/include/hdfs && cp -R target/hadoop-hdfs-native-client-2.8.4/lib/native/. ${TP_OUTPUT}/lib
COMMAND cd ${HDFS_CLIENT_DIR} && cp -R target/hadoop-hdfs-native-client-3.3.6/include/. ${TP_OUTPUT}/include/hdfs && cp -R target/hadoop-hdfs-native-client-3.3.6/lib/native/. ${TP_OUTPUT}/lib
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
DEPENDS protobuf
DOWNLOAD_EXTRACT_TIMESTAMP true
DOWNLOAD_NO_PROGRESS true
)
Expand Down Expand Up @@ -303,18 +337,6 @@ ExternalProject_Add(nlohmann_json
DOWNLOAD_NO_PROGRESS true
)

ExternalProject_Add(abseil
URL ${OSS_URL_PREFIX}/abseil-20230802.1.zip
https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.1.zip
URL_MD5 5c6193dbc82834f8e762c6a28c9cc615
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DABSL_FIND_GOOGLETEST=OFF
-DCMAKE_CXX_STANDARD=17
DOWNLOAD_EXTRACT_TIMESTAMP true
DOWNLOAD_NO_PROGRESS true
)

ExternalProject_Add(s2geometry
URL ${OSS_URL_PREFIX}/s2geometry-0.10.0.tar.gz
https://github.com/google/s2geometry/archive/refs/tags/v0.10.0.tar.gz
Expand Down Expand Up @@ -357,8 +379,7 @@ set(SNAPPY_OPTIONS
-DSNAPPY_FUZZING_BUILD=OFF
-DSNAPPY_INSTALL=ON)
execute_process(COMMAND arch OUTPUT_VARIABLE ARCH_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "ARCH_NAME = ${ARCH_NAME}")
if (ARCH_NAME EQUAL "x86_64")
if (ARCH_NAME STREQUAL "x86_64")
set(SNAPPY_OPTIONS
${SNAPPY_OPTIONS}
-DSNAPPY_REQUIRE_AVX=ON
Expand Down
Loading

0 comments on commit db353c5

Please sign in to comment.