Skip to content

Commit

Permalink
Merge branch 'release/2.4' into aurora/2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
mjean308 committed Jan 18, 2024
2 parents 6172d80 + f13844d commit 83bb53a
Show file tree
Hide file tree
Showing 17 changed files with 129 additions and 96 deletions.
5 changes: 4 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -1223,7 +1223,10 @@ pipeline {
functionalTest(
inst_repos: daosRepos(),
inst_rpms: functionalPackages(1, next_version, 'client-tests-openmpi'),
test_function: 'runTestFunctionalV2'))
test_function: 'runTestFunctionalV2',
ftest_arg: getFunctionalArgs(
pragma_suffix: '-hw-medium-ucx-provider',
provider: cachedCommitPragma('Test-provider-ucx', 'ucx+ud_x'))['ftest_arg']))
}
post {
always {
Expand Down
2 changes: 1 addition & 1 deletion TAG
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.4.1-rc1
2.4.1-rc2
26 changes: 16 additions & 10 deletions ci/provisioning/post_provision_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ source ci/provisioning/post_provision_config_common_functions.sh
# shellcheck disable=SC1091
source ci/junit.sh


: "${MLNX_VER_NUM:=latest-5.8}"

: "${DISTRO:=EL_7}"
DSL_REPO_var="DAOS_STACK_${DISTRO}_LOCAL_REPO"
DSG_REPO_var="DAOS_STACK_${DISTRO}_GROUP_REPO"
Expand All @@ -42,32 +45,35 @@ if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \
DAOS_STACK_GROUP_REPO=\"${!DSG_REPO_var:-}\"
DAOS_STACK_EL_8_APPSTREAM_REPO=\"${!DSA_REPO_var:-}\"
DISTRO=\"$DISTRO\"
DAOS_STACK_RETRY_DELAY_SECONDS=\"${DAOS_STACK_RETRY_DELAY_SECONDS}\"
DAOS_STACK_RETRY_COUNT=\"${DAOS_STACK_RETRY_COUNT}\"
BUILD_URL=\"${BUILD_URL}\"
STAGE_NAME=\"${STAGE_NAME}\"
OPERATIONS_EMAIL=\"${OPERATIONS_EMAIL}\"
DAOS_STACK_RETRY_DELAY_SECONDS=\"$DAOS_STACK_RETRY_DELAY_SECONDS\"
DAOS_STACK_RETRY_COUNT=\"$DAOS_STACK_RETRY_COUNT\"
MLNX_VER_NUM=\"$MLNX_VER_NUM\"
BUILD_URL=\"$BUILD_URL\"
STAGE_NAME=\"$STAGE_NAME\"
OPERATIONS_EMAIL=\"$OPERATIONS_EMAIL\"
COMMIT_MESSAGE=\"$sanitized_commit_message\"
REPO_FILE_URL=\"$REPO_FILE_URL\"
ARTIFACTORY_URL=\"${ARTIFACTORY_URL:-}\"
BRANCH_NAME=\"${BRANCH_NAME:-}\"
CHANGE_TARGET=\"${CHANGE_TARGET:-}\"
CI_RPM_TEST_VERSION=\"${CI_RPM_TEST_VERSION:-}\"
CI_PR_REPOS=\"${CI_PR_REPOS:-}\"
REPO_PATH=\"${REPO_PATH:-}\"
ARTIFACTS_URL=\"${ARTIFACTS_URL:-}\"
$(cat ci/stacktrace.sh)
$(cat ci/junit.sh)
$(cat ci/provisioning/post_provision_config_common_functions.sh)
$(cat ci/provisioning/post_provision_config_common.sh)
$(cat ci/provisioning/post_provision_config_nodes_"${DISTRO}".sh)
$(cat ci/provisioning/post_provision_config_nodes_"$DISTRO".sh)
$(cat ci/provisioning/post_provision_config_nodes.sh)"; then
report_junit post_provision_config.sh results.xml "$NODESTRING"
exit 1
fi

git log --format=%s -n 1 HEAD | \
git log --format=%B -n 1 HEAD | sed -ne '1s/^\([A-Z][A-Z]*-[0-9][0-9]*\) .*/\1/p' \
-e '/^Fixes:/{s/^Fixes: *//;s/ /\
/g;p}' | \
retry_cmd 60 ssh -i ci_key -l jenkins "${NODELIST%%,*}" \
"cat >/tmp/commit_title"
"cat >/tmp/commit_fixes"
git log --pretty=format:%h --abbrev-commit --abbrev=7 |
retry_cmd 60 ssh -i ci_key -l jenkins "${NODELIST%%,*}" "cat >/tmp/commit_list"
retry_cmd 600 ssh root@"${NODELIST%%,*}" "mkdir -p /scratch && " \
"mount wolf-2:/export/scratch /scratch"
22 changes: 8 additions & 14 deletions ci/provisioning/post_provision_config_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,38 +24,32 @@ if [ -n "$repo_files_pr" ]; then
REPO_FILE_URL="${JENKINS_URL:-https://build.hpdd.intel.com/}job/daos-do/job/repo-files/job/$branch/$build_number/artifact/"
fi

id=$(lsb_release -si)
release=$(lsb_release -sr)
. /etc/os-release
# shellcheck disable=SC2034
EXCLUDE_UPGRADE=mercury,daos,daos-\*
if rpm -qa | grep mlnx; then
# packages not to allow upgrading if MLNX OFED is installed
EXCLUDE_UPGRADE+=,openmpi,\*mlnx\*,\*ucx\*
fi
case "$id" in
CentOS|Rocky|AlmaLinux|RedHatEnterpriseServer)
if [ "${release%%.*}" = 7 ]; then
DISTRO_NAME=centos${release%%.*}
case "$ID_LIKE" in
*rhel*)
if [ "$VERSION_ID" = "7" ]; then
DISTRO_NAME=centos"$VERSION_ID"
EXCLUDE_UPGRADE+=,fuse
else
DISTRO_NAME=el${release%%.*}
DISTRO_NAME=el${VERSION_ID%%.*}
EXCLUDE_UPGRADE+=,dpdk\*
fi
REPOS_DIR=/etc/yum.repos.d
DISTRO_GENERIC=el
# shellcheck disable=SC2034
LSB_RELEASE=redhat-lsb-core
;;
openSUSE)
*suse*)
# shellcheck disable=SC2034
DISTRO_NAME=leap${release%%.*}
DISTRO_NAME=leap${VERSION_ID%%.*}
# shellcheck disable=SC2034
DISTRO_GENERIC=sl
# shellcheck disable=SC2034
REPOS_DIR=/etc/dnf/repos.d
EXCLUDE_UPGRADE+=,fuse,fuse-libs,fuse-devel
;;
esac

# shellcheck disable=SC2034
MLNX_VER_NUM=5.8-3.0.7.0
26 changes: 17 additions & 9 deletions ci/provisioning/post_provision_config_nodes_EL_8.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# (C) Copyright 2021-2022 Intel Corporation.
# (C) Copyright 2021-2023 Intel Corporation.
#
# SPDX-License-Identifier: BSD-2-Clause-Patent

Expand All @@ -16,8 +16,14 @@ group_repo_post() {

distro_custom() {
# install avocado
dnf -y install python3-avocado{,-plugins-{output-html,varianter-yaml-to-mux}} \
clustershell
local avocado_rpms=(python3-avocado{,-plugins-{output-html,varianter-yaml-to-mux}})
if [ -z "$(dnf repoquery "${avocado_rpms[@]}")" ]; then
avocado_rpms=()
pip install "avocado-framework<83.0"
pip install "avocado-framework-plugin-result-html<83.0"
pip install "avocado-framework-plugin-varianter-yaml-to-mux<83.0"
fi
dnf -y install "${avocado_rpms[@]}" clustershell

# for Launchable's pip install
dnf -y install python3-setuptools.noarch
Expand Down Expand Up @@ -47,21 +53,23 @@ install_mofed() {


stream=false
gversion="$(lsb_release -sr)"
gversion="$VERSION_ID"
if [ "$gversion" == "8" ]; then
gversion="8.6"
# Mellanox does not have a release for 8.9 yet.
gversion="8.8"
stream=true
elif [[ $gversion = *.*.* ]]; then
gversion="${gversion%.*}"
fi

# Add a repo to install MOFED RPMS
repo_url=https://artifactory.dc.hpdd.intel.com/artifactory/mlnx_ofed/"$MLNX_VER_NUM-rhel$gversion"-x86_64/
artifactory_base_url="https://artifactory.dc.hpdd.intel.com/artifactory/"
mellanox_proxy="${artifactory_base_url}mellanox-proxy/mlnx_ofed/"
mellanox_key_url="${artifactory_base_url}mlnx_ofed/RPM-GPG-KEY-Mellanox"
rpm --import "$mellanox_key_url"
repo_url="$mellanox_proxy$MLNX_VER_NUM/rhel$gversion/x86_64/"
dnf -y config-manager --add-repo="$repo_url"
curl -L -O "$repo_url"RPM-GPG-KEY-Mellanox
dnf -y config-manager --save --setopt="$(url_to_repo "$repo_url")".gpgcheck=1
rpm --import RPM-GPG-KEY-Mellanox
rm -f RPM-GPG-KEY-Mellanox
dnf repolist || true

time dnf -y install mlnx-ofed-basic ucx-cma ucx-ib ucx-knem ucx-rdmacm ucx-xpmem
Expand Down
7 changes: 7 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
daos (2.4.1-2) unstable; urgency=medium

[ Phillip Henderson ]
* Second release candidate for 2.4.1

-- Phillip Henderson <phillip.henderson@intel.com> Fri, 05 Jan 2024 12:51:00 -0500

daos (2.4.1-1) unstable; urgency=medium

[ Phillip Henderson ]
Expand Down
38 changes: 21 additions & 17 deletions src/common/rsvc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2017-2022 Intel Corporation.
* (C) Copyright 2017-2023 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -53,7 +53,7 @@ rsvc_client_init(struct rsvc_client *client, const d_rank_list_t *ranks)
return -DER_NOMEM;
}
rsvc_client_reset_leader(client);
client->sc_next = 0;
client->sc_next = -1;
return 0;
}

Expand All @@ -78,26 +78,28 @@ rsvc_client_fini(struct rsvc_client *client)
int
rsvc_client_choose(struct rsvc_client *client, crt_endpoint_t *ep)
{
int chosen = -1;
int chosen;

D_DEBUG(DB_MD, DF_CLI"\n", DP_CLI(client));

if (client->sc_ranks->rl_nr == 0) {
D_DEBUG(DB_MD, "replica list empty\n");
return -DER_NOTREPLICA;
}

if (client->sc_leader_known && client->sc_leader_aliveness > 0) {
chosen = client->sc_leader_index;
} else if (client->sc_ranks->rl_nr > 0) {
} else {
if (client->sc_next < 0)
client->sc_next = d_randn(client->sc_ranks->rl_nr);
chosen = client->sc_next;
/* The hintless search is a round robin of all replicas. */
client->sc_next++;
client->sc_next %= client->sc_ranks->rl_nr;
}

if (chosen == -1) {
D_DEBUG(DB_MD, "replica list empty\n");
return -DER_NOTREPLICA;
} else {
D_ASSERTF(chosen >= 0 && chosen < client->sc_ranks->rl_nr,
"%d\n", chosen);
ep->ep_rank = client->sc_ranks->rl_ranks[chosen];
}
D_ASSERTF(chosen >= 0 && chosen < client->sc_ranks->rl_nr, "chosen=%d\n", chosen);
ep->ep_rank = client->sc_ranks->rl_ranks[chosen];
ep->ep_tag = 0;
return 0;
}
Expand Down Expand Up @@ -127,7 +129,7 @@ rsvc_client_process_error(struct rsvc_client *client, int rc,
(rl->rl_nr - pos) * sizeof(*rl->rl_ranks));
client->sc_next = pos;
} else {
client->sc_next = 0;
client->sc_next = (rl->rl_nr > 0 ? 0 : -1);
}
D_ERROR("removed rank %u from replica list due to "DF_RC"\n",
ep->ep_rank, DP_RC(rc));
Expand All @@ -145,10 +147,12 @@ rsvc_client_process_error(struct rsvc_client *client, int rc,
* Gave up this leader. Start the hintless
* search.
*/
D_DEBUG(DB_MD, "give up leader rank %u\n",
ep->ep_rank);
client->sc_next = client->sc_leader_index + 1;
client->sc_next %= client->sc_ranks->rl_nr;
D_DEBUG(DB_MD, "give up leader rank %u\n", ep->ep_rank);
client->sc_next = d_randn(client->sc_ranks->rl_nr);
if (client->sc_next == leader_index) {
client->sc_next++;
client->sc_next %= client->sc_ranks->rl_nr;
}
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/gurt/misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,19 @@ d_rand()
return result;
}

/* Return a random integer in [0, n), where n must be positive. */
long int
d_randn(long int n)
{
long int i;

D_ASSERT(n > 0);
i = ((double)d_rand() / D_RAND_MAX) * n;
if (i >= n)
i = 0;
return i;
}

void
d_free(void *ptr)
{
Expand Down
1 change: 1 addition & 0 deletions src/include/gurt/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ extern "C" {

void d_srand(long int);
long int d_rand(void);
long int d_randn(long int n);

/* memory allocating macros */
void d_free(void *);
Expand Down
3 changes: 2 additions & 1 deletion src/pool/rpc.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2016-2023 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -169,6 +169,7 @@ extern int dc_pool_proto_version;

CRT_RPC_DECLARE(pool_op, DAOS_ISEQ_POOL_OP, DAOS_OSEQ_POOL_OP)

/* If pri_op.pi_hdl is not null, call rdb_campaign. */
#define DAOS_ISEQ_POOL_CREATE /* input fields */ \
((struct pool_op_in) (pri_op) CRT_VAR) \
((d_rank_list_t) (pri_tgt_ranks) CRT_PTR) \
Expand Down
18 changes: 18 additions & 0 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,7 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group,
struct pool_create_in *in;
struct pool_create_out *out;
struct d_backoff_seq backoff_seq;
int n_attempts = 0;
int rc;

/* Check for default label supplied via property. */
Expand Down Expand Up @@ -927,9 +928,16 @@ ds_pool_svc_dist_create(const uuid_t pool_uuid, int ntargets, const char *group,
in->pri_ndomains = ndomains;
in->pri_domains.ca_count = ndomains;
in->pri_domains.ca_arrays = (uint32_t *)domains;
if (n_attempts == 0)
/*
* This is our first attempt. Use a non-null pi_hdl to ask the
* chosen PS replica to campaign.
*/
uuid_generate(in->pri_op.pi_hdl);

/* Send the POOL_CREATE request. */
rc = dss_rpc_send(rpc);
n_attempts++;
out = crt_reply_get(rpc);
D_ASSERT(out != NULL);
rc = rsvc_client_complete_rpc(&client, &ep, rc,
Expand Down Expand Up @@ -2605,6 +2613,16 @@ ds_pool_create_handler(crt_rpc_t *rpc)
D_GOTO(out_mutex, rc = -DER_CANCELED);
}

if (!uuid_is_null(in->pri_op.pi_hdl)) {
/*
* Try starting a campaign without waiting for the election
* timeout. Since this is a performance optimization, ignore
* errors.
*/
rc = rdb_campaign(svc->ps_rsvc.s_db);
D_DEBUG(DB_MD, DF_UUID": campaign: "DF_RC"\n", DP_UUID(svc->ps_uuid), DP_RC(rc));
}

rc = rdb_tx_begin(svc->ps_rsvc.s_db, RDB_NIL_TERM, &tx);
if (rc != 0)
D_GOTO(out_mutex, rc);
Expand Down
Loading

0 comments on commit 83bb53a

Please sign in to comment.