Skip to content

Commit

Permalink
feat(thirdparty): Bump Hadoop to 3.3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
acelyc111 committed Jun 3, 2024
1 parent dbe66a2 commit c1f8a12
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 59 deletions.
5 changes: 4 additions & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ if [ "$arch_output"x == "x86_64"x ]; then
elif [ "$arch_output"x == "aarch64"x ]; then
ARCH_TYPE="aarch64"
else
echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' as default"
ARCH_TYPE="amd64"
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
export LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${BUILD_LATEST_DIR}/output/lib:${THIRDPARTY_ROOT}/output/lib:${LD_LIBRARY_PATH}
# Disable AddressSanitizerOneDefinitionRuleViolation, see https://github.com/google/sanitizers/issues/1017 for details.
Expand Down Expand Up @@ -2092,6 +2093,8 @@ case $cmd in
;;
pack_server)
shift
# source the config_hdfs.sh to get the HADOOP_HOME.
source "${ROOT}"/scripts/config_hdfs.sh
PEGASUS_ROOT=$ROOT ./scripts/pack_server.sh $*
;;
pack_client)
Expand Down
21 changes: 16 additions & 5 deletions scripts/download_hadoop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,24 @@

set -e

CWD=$(cd $(dirname $0) && pwd)
CWD=$(cd "$(dirname "$0")" && pwd)

if [ $# -ge 1 ]; then
HADOOP_BIN_PATH=$1
fi

HADOOP_VERSION=2.8.4
HADOOP_DIR_NAME=hadoop-${HADOOP_VERSION}
HADOOP_PACKAGE_MD5="b30b409bb69185003b3babd1504ba224"
${CWD}/download_package.sh ${HADOOP_DIR_NAME} ${HADOOP_PACKAGE_MD5} ${HADOOP_BIN_PATH}
HADOOP_VERSION="hadoop-3.3.6"
arch_output=$(arch)
if [ "$arch_output"x == "x86_64"x ]; then
HADOOP_PACKAGE_MD5="1cbe1214299cd3bd282d33d3934b5cbd"
HADOOP_BASE_NAME=${HADOOP_VERSION}
elif [ "$arch_output"x == "aarch64"x ]; then
HADOOP_PACKAGE_MD5="369f899194a920e0d1c3c3bc1718b3b5"
HADOOP_BASE_NAME=${HADOOP_VERSION}-"$(arch)"
else
HADOOP_PACKAGE_MD5="1cbe1214299cd3bd282d33d3934b5cbd"
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi

DOWNLOAD_BASE_URL="https://archive.apache.org/dist/hadoop/common/${HADOOP_VERSION}/"
"${CWD}"/download_package.sh "${HADOOP_BASE_NAME}" ${HADOOP_PACKAGE_MD5} "${HADOOP_BIN_PATH}" ${DOWNLOAD_BASE_URL} "${HADOOP_VERSION}"
52 changes: 31 additions & 21 deletions scripts/download_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,59 +21,69 @@ set -e

if [ $# -lt 2 ]; then
echo "Invalid arguments !"
echo "USAGE: $0 <DIR_NAME> <PACKAGE_MD5> [TARGET_PATH]"
echo "USAGE: $0 <PACKAGE_BASE_NAME> <PACKAGE_MD5> [TARGET_PATH]"
exit 1
fi

DIR_NAME=$1
PACKAGE_BASE_NAME=$1
PACKAGE_MD5=$2

if [ $# -lt 3 ]; then
echo "TARGET_PATH is not provided, thus do not try to download ${DIR_NAME}"
echo "TARGET_PATH is not provided, thus do not try to download ${PACKAGE_BASE_NAME}"
exit 0
fi

TARGET_PATH=$3
if [ -d ${TARGET_PATH} ]; then
echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download ${DIR_NAME}"
if [ -d "${TARGET_PATH}" ]; then
echo "TARGET_PATH ${TARGET_PATH} has existed, thus do not try to download ${PACKAGE_BASE_NAME}"
exit 0
fi

PACKAGE_NAME=${DIR_NAME}.tar.gz
if [ ! -f ${PACKAGE_NAME} ]; then
echo "Downloading ${DIR_NAME}..."
DEFAULT_DOWNLOAD_BASE_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/"
if [ $# -ge 4 ]; then
DEFAULT_DOWNLOAD_BASE_URL=$4
fi

DIR_NAME=${PACKAGE_BASE_NAME}
if [ $# -ge 5 ]; then
DIR_NAME=$5
fi

PACKAGE_NAME=${PACKAGE_BASE_NAME}.tar.gz
if [ ! -f "${PACKAGE_NAME}" ]; then
echo "Downloading ${PACKAGE_NAME} ..."

DOWNLOAD_URL="https://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/${PACKAGE_NAME}"
if ! wget -T 10 -t 5 ${DOWNLOAD_URL}; then
echo "ERROR: download ${DIR_NAME} failed"
DOWNLOAD_URL=${DEFAULT_DOWNLOAD_BASE_URL}${PACKAGE_NAME}
if ! wget -T 10 -t 5 "${DOWNLOAD_URL}"; then
echo "ERROR: download ${PACKAGE_NAME} failed"
exit 1
fi

if [ `md5sum ${PACKAGE_NAME} | awk '{print$1}'` != ${PACKAGE_MD5} ]; then
if [ "$(md5sum "${PACKAGE_NAME}" | awk '{print$1}')" != "${PACKAGE_MD5}" ]; then
echo "Check file ${PACKAGE_NAME} md5sum failed!"
exit 1
fi
fi

rm -rf ${DIR_NAME}
rm -rf "${DIR_NAME}"

echo "Decompressing ${DIR_NAME}..."
if ! tar xf ${PACKAGE_NAME}; then
echo "ERROR: decompress ${DIR_NAME} failed"
rm -f ${PACKAGE_NAME}
echo "Decompressing ${PACKAGE_NAME} ..."
if ! tar xf "${PACKAGE_NAME}"; then
echo "ERROR: decompress ${PACKAGE_NAME} failed"
rm -f "${PACKAGE_NAME}"
exit 1
fi

rm -f ${PACKAGE_NAME}
rm -f "${PACKAGE_NAME}"

if [ ! -d ${DIR_NAME} ]; then
if [ ! -d "${DIR_NAME}" ]; then
echo "ERROR: ${DIR_NAME} does not exist"
exit 1
fi

if [ -d ${TARGET_PATH} ]; then
if [ -d "${TARGET_PATH}" ]; then
echo "TARGET_PATH ${TARGET_PATH} has been generated, which means it and ${DIR_NAME} are the same dir thus do not do mv any more"
exit 0
fi

mv ${DIR_NAME} ${TARGET_PATH}
mv "${DIR_NAME}" "${TARGET_PATH}"
54 changes: 32 additions & 22 deletions scripts/pack_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,30 +149,40 @@ pack_server_lib crypto $separate_servers
pack_server_lib ssl $separate_servers

# Pack hadoop-related files.
# If you want to use hdfs service to backup/restore/bulkload pegasus tables,
# you need to set env ${HADOOP_HOME}, edit ${HADOOP_HOME}/etc/hadoop/core-site.xml,
# and specify the keytab file.
if [ -n "$HADOOP_HOME" ] && [ -n "$keytab_file" ]; then
mkdir -p ${pack}/hadoop
copy_file $keytab_file ${pack}/hadoop
copy_file ${HADOOP_HOME}/etc/hadoop/core-site.xml ${pack}/hadoop
if [ -d $HADOOP_HOME/share/hadoop ]; then
for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
copy_file $f ${pack}/hadoop
done
# If you want to use hdfs service to backup/restore/bulkload pegasus tables, you need to
# set env ${HADOOP_HOME} to the proper directory where contains Hadoop *.jar files.
if [ -n "$HADOOP_HOME" ]; then
# Verify one of the jars.
arch_output=$(arch)
if [ "$arch_output"x == "x86_64"x ]; then
HDFS_JAR_MD5="f67f3a5613c885e1622b1056fd94262b"
elif [ "$arch_output"x == "aarch64"x ]; then
HDFS_JAR_MD5="fcc09dbed936cd8673918774cc3ead6b"
else
HDFS_JAR_MD5="f67f3a5613c885e1622b1056fd94262b"
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
HDFS_JAR=${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs-3.3.6.jar
if [ "$(md5sum "${HDFS_JAR}" | awk '{print$1}')" != "${HDFS_JAR_MD5}" ]; then
echo "check file ${HDFS_JAR} md5sum failed!"
exit 1
fi
# Pack the jars.
mkdir -p ${pack}/hadoop
for f in ${HADOOP_HOME}/share/hadoop/common/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/common/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/lib/*.jar; do
copy_file $f ${pack}/hadoop
done
for f in ${HADOOP_HOME}/share/hadoop/hdfs/*.jar; do
copy_file $f ${pack}/hadoop
done
else
echo "Couldn't find env ${HADOOP_HOME} or no valid keytab file was specified,
hadoop-related files were not packed."
echo "Couldn't find env HADOOP_HOME, hadoop-related files were not packed."
fi

DISTRIB_ID=$(cat /etc/*-release | grep DISTRIB_ID | awk -F'=' '{print $2}')
Expand Down
2 changes: 1 addition & 1 deletion src/sample/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if [ "$arch_output"x == "x86_64"x ]; then
elif [ "$arch_output"x == "aarch64"x ]; then
ARCH_TYPE="aarch64"
else
echo "WARNING: unsupported CPU architecture '$arch_output', use 'x86_64' as default"
echo "WARNING: unrecognized CPU architecture '$arch_output', use 'x86_64' as default"
fi
export LD_LIBRARY_PATH=${JAVA_HOME}/jre/lib/${ARCH_TYPE}:${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server:${PEGASUS_THIRDPARTY_ROOT}/output/lib:$(pwd)/../../lib:${LD_LIBRARY_PATH}

Expand Down
25 changes: 16 additions & 9 deletions thirdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,23 @@ ExternalProject_Add(gperftools
DOWNLOAD_NO_PROGRESS true
)

set(HDFS_CLIENT_DIR "hadoop-hdfs-project/hadoop-hdfs-native-client")
execute_process(COMMAND arch OUTPUT_VARIABLE ARCH_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
set(HADOOP_VERSION "hadoop-3.3.6")
if (ARCH_NAME STREQUAL "aarch64")
set(HADOOP_PACKAGE_NAME ${HADOOP_VERSION}-${ARCH_NAME})
set(HADOOP_MD5 "369f899194a920e0d1c3c3bc1718b3b5")
else ()
# Consider as x86_64 architecture as default
set(HADOOP_PACKAGE_NAME ${HADOOP_VERSION})
set(HADOOP_MD5 "1cbe1214299cd3bd282d33d3934b5cbd")
endif ()
set(HADOOP_URL_PREFIX "https://archive.apache.org/dist/hadoop/common/${HADOOP_VERSION}")

ExternalProject_Add(hadoop
URL ${OSS_URL_PREFIX}/hadoop-release-2.8.4.tar.gz
https://github.com/apache/hadoop/archive/refs/tags/rel/release-2.8.4.tar.gz
URL_MD5 a1be737d4bff14923689619ab6545a96
URL ${HADOOP_URL_PREFIX}/${HADOOP_PACKAGE_NAME}.tar.gz
URL_MD5 ${HADOOP_MD5}
PATCH_COMMAND ""
COMMAND cd ${HDFS_CLIENT_DIR} && mvn package -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar
COMMAND cd ${HDFS_CLIENT_DIR} && cp -R target/hadoop-hdfs-native-client-2.8.4/include/. ${TP_OUTPUT}/include/hdfs && cp -R target/hadoop-hdfs-native-client-2.8.4/lib/native/. ${TP_OUTPUT}/lib
COMMAND mkdir -p ${TP_OUTPUT}/include/hdfs/ && cp include/hdfs.h ${TP_OUTPUT}/include/hdfs/ && cp lib/native/libhdfs.a lib/native/libhdfs.so ${TP_OUTPUT}/lib/ && cd ${TP_OUTPUT}/lib/ && ln -sf libhdfs.so libhdfs.so.0.0.0
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
Expand Down Expand Up @@ -356,9 +365,7 @@ set(SNAPPY_OPTIONS
-DSNAPPY_BUILD_BENCHMARKS=OFF
-DSNAPPY_FUZZING_BUILD=OFF
-DSNAPPY_INSTALL=ON)
execute_process(COMMAND arch OUTPUT_VARIABLE ARCH_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "ARCH_NAME = ${ARCH_NAME}")
if (ARCH_NAME EQUAL "x86_64")
if (ARCH_NAME STREQUAL "x86_64")
set(SNAPPY_OPTIONS
${SNAPPY_OPTIONS}
-DSNAPPY_REQUIRE_AVX=ON
Expand Down

0 comments on commit c1f8a12

Please sign in to comment.