From 1713c4852ca66f9ec5023515a26f1f06902a21bc Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Thu, 1 Feb 2024 18:37:27 +0530 Subject: [PATCH] qa: add qa/tasks/nvmeof.py and rbd/nvmeof_basic_task and fio workunits This is v2 of the rbd/nvmeof test: It deploys 1 gateway and 1 initiator. Then does basic verification on nvme commands and runs fio. This commit creates: 1. qa/tasks/nvmeof.py: adds a new 'Nvmeof' task which deploys the gateway and shares config with the initiator hosts. Sharing config was previously done by 'nvmeof_gateway_cfg' task in qa/tasks/cephadm.py (that task is removed in this commit). 2. qa/workunits/rbd/nvmeof_basic_tests.sh: Runs nvme commands (discovery, connect, connect-all, disconnect-all, and list-subsys) and does basic verification of the output. 3. qa/workunits/rbd/nvmeof_fio_test.sh: Runs fio command. Also runs iostat in parallel if IOSTAT_INTERVAL variable is set. This variable configures the delay between each iostat print. nvmeof-cli upgrade from v0.0.6 to v0.0.7 introduced major changes to all nvmeof commands. This commit changes v0.0.6 commands to v0.0.7 in qa/workunits/rbd/nvmeof_initiator.sh Signed-off-by: Vallari Agrawal --- qa/suites/rbd/nvmeof/base/install.yaml | 19 -- qa/suites/rbd/nvmeof/cluster/fixed-3.yaml | 2 + .../nvmeof/workloads/nvmeof_initiator.yaml | 23 ++- qa/tasks/cephadm.py | 38 ---- qa/tasks/nvmeof.py | 168 ++++++++++++++++++ qa/workunits/rbd/nvmeof_basic_tests.sh | 72 ++++++++ qa/workunits/rbd/nvmeof_fio_test.sh | 36 ++++ qa/workunits/rbd/nvmeof_initiator.sh | 74 ++------ 8 files changed, 311 insertions(+), 121 deletions(-) create mode 100644 qa/tasks/nvmeof.py create mode 100755 qa/workunits/rbd/nvmeof_basic_tests.sh create mode 100755 qa/workunits/rbd/nvmeof_fio_test.sh diff --git a/qa/suites/rbd/nvmeof/base/install.yaml b/qa/suites/rbd/nvmeof/base/install.yaml index 5a852f14dbe1b..6fc91d8f359f7 100644 --- a/qa/suites/rbd/nvmeof/base/install.yaml +++ b/qa/suites/rbd/nvmeof/base/install.yaml @@ -10,23 +10,4 @@ tasks: - ceph orch host ls - ceph orch device ls - ceph osd lspools - # create pool - - ceph osd pool create mypool - - rbd pool init mypool - # deploy nvmeof - ## Uncomment to test specific nvmeof images - ## - ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:latest - - ceph orch apply nvmeof mypool --placement="1 $(hostname)" - - ceph orch ps --refresh -- cephadm.wait_for_service: - service: nvmeof.mypool - -- cephadm.nvmeof_gateway_cfg: - source: host.a - target: client.1 - service: nvmeof.mypool - -- exec: - client.0: - - journalctl -u $(systemctl list-units | grep nvmeof.mypool | awk '{print $1}') diff --git a/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml b/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml index 42e696cd2f147..f417079e31a1b 100644 --- a/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml +++ b/qa/suites/rbd/nvmeof/cluster/fixed-3.yaml @@ -5,9 +5,11 @@ roles: - osd.0 - osd.1 - client.0 + - ceph.nvmeof.nvmeof.a - - host.b - mon.b - osd.2 - osd.3 - osd.4 - client.1 +- - client.2 diff --git a/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml b/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml index 4c947c1f787fa..bbb9b0ab5f2a0 100644 --- a/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml +++ b/qa/suites/rbd/nvmeof/workloads/nvmeof_initiator.yaml @@ -1,6 +1,27 @@ tasks: +- nvmeof: + client: client.0 + version: latest # "default" uses packaged version; change to test specific nvmeof images, example "latest" + rbd: + pool_name: mypool + image_name: myimage + gateway_config: + source: host.a + target: client.2 + vars: + cli_version: latest + +- cephadm.wait_for_service: + service: nvmeof.mypool + - workunit: no_coverage_and_limits: true clients: - client.1: + client.2: - rbd/nvmeof_initiator.sh + - rbd/nvmeof_basic_tests.sh + - rbd/nvmeof_fio_test.sh + env: + RBD_POOL: mypool + RBD_IMAGE: myimage + IOSTAT_INTERVAL: '10' diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py index 9e386bbd5c29b..f60aabc84359d 100644 --- a/qa/tasks/cephadm.py +++ b/qa/tasks/cephadm.py @@ -21,7 +21,6 @@ from teuthology.orchestra import run from teuthology.orchestra.daemon import DaemonGroup from teuthology.config import config as teuth_config -from teuthology.exceptions import ConfigError from textwrap import dedent from tasks.cephfs.filesystem import MDSCluster, Filesystem from tasks.util import chacra @@ -101,43 +100,6 @@ def update_archive_setting(ctx, key, value): yaml.safe_dump(info_yaml, info_file, default_flow_style=False) -@contextlib.contextmanager -def nvmeof_gateway_cfg(ctx, config): - source_host = config.get('source') - target_host = config.get('target') - nvmeof_service = config.get('service') - if not (source_host and target_host and nvmeof_service): - raise ConfigError('nvmeof_gateway_cfg requires "source", "target", and "service"') - remote = list(ctx.cluster.only(source_host).remotes.keys())[0] - ip_address = remote.ip_address - gateway_name = "" - r = remote.run(args=[ - 'systemctl', 'list-units', - run.Raw('|'), 'grep', nvmeof_service - ], stdout=StringIO()) - output = r.stdout.getvalue() - pattern_str = f"{re.escape(nvmeof_service)}(.*?)(?=\.service)" - pattern = re.compile(pattern_str) - match = pattern.search(output) - if match: - gateway_name = match.group() - conf_data = dedent(f""" - NVMEOF_GATEWAY_IP_ADDRESS={ip_address} - NVMEOF_GATEWAY_NAME={gateway_name} - """) - target_remote = list(ctx.cluster.only(target_host).remotes.keys())[0] - target_remote.write_file( - path='/etc/ceph/nvmeof.env', - data=conf_data, - sudo=True - ) - - try: - yield - finally: - pass - - @contextlib.contextmanager def normalize_hostnames(ctx): """ diff --git a/qa/tasks/nvmeof.py b/qa/tasks/nvmeof.py new file mode 100644 index 0000000000000..b75d00d93ae80 --- /dev/null +++ b/qa/tasks/nvmeof.py @@ -0,0 +1,168 @@ +import logging +from textwrap import dedent +from teuthology.task import Task +from teuthology import misc +from teuthology.exceptions import ConfigError +from tasks.util import get_remote_for_role +from tasks.cephadm import _shell + +log = logging.getLogger(__name__) + +conf_file = '/etc/ceph/nvmeof.env' + + +class Nvmeof(Task): + """ + Setup nvmeof gateway on client and then share gateway config to target host. + + - nvmeof: + client: client.0 + version: default + rbd: + pool_name: mypool + image_name: myimage + rbd_size: 1024 + gateway_config: + source: host.a + target: client.2 + vars: + cli_version: latest + + """ + + def setup(self): + super(Nvmeof, self).setup() + try: + self.client = self.config['client'] + except KeyError: + raise ConfigError('nvmeof requires a client to connect with') + + self.cluster_name, type_, self.client_id = misc.split_role(self.client) + if type_ != 'client': + msg = 'client role ({0}) must be a client'.format(self.client) + raise ConfigError(msg) + self.remote = get_remote_for_role(self.ctx, self.client) + + def begin(self): + super(Nvmeof, self).begin() + self._set_defaults() + self.deploy_nvmeof() + self.set_gateway_cfg() + + def _set_defaults(self): + self.gateway_image = self.config.get('version', 'default') + + rbd_config = self.config.get('rbd', {}) + self.poolname = rbd_config.get('pool_name', 'mypool') + self.rbd_image_name = rbd_config.get('image_name', 'myimage') + self.rbd_size = rbd_config.get('rbd_size', 1024*8) + + gateway_config = self.config.get('gateway_config', {}) + conf_vars = gateway_config.get('vars', {}) + self.cli_image = conf_vars.get('cli_version', 'latest') + self.bdev = conf_vars.get('bdev', 'mybdev') + self.serial = conf_vars.get('serial', 'SPDK00000000000001') + self.nqn = conf_vars.get('nqn', 'nqn.2016-06.io.spdk:cnode1') + self.port = conf_vars.get('port', '4420') + self.srport = conf_vars.get('srport', '5500') + + def deploy_nvmeof(self): + """ + Deploy nvmeof gateway. + """ + log.info('[nvmeof]: deploying nvmeof gateway...') + if not hasattr(self.ctx, 'ceph'): + self.ctx.ceph = {} + fsid = self.ctx.ceph[self.cluster_name].fsid + + nodes = [] + daemons = {} + + for remote, roles in self.ctx.cluster.remotes.items(): + for role in [r for r in roles + if misc.is_type('nvmeof', self.cluster_name)(r)]: + c_, _, id_ = misc.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + + if nodes: + image = self.gateway_image + if (image != "default"): + log.info(f'[nvmeof]: ceph config set mgr mgr/cephadm/container_image_nvmeof quay.io/ceph/nvmeof:{image}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'ceph', 'config', 'set', 'mgr', + 'mgr/cephadm/container_image_nvmeof', + f'quay.io/ceph/nvmeof:{image}' + ]) + + poolname = self.poolname + imagename = self.rbd_image_name + + log.info(f'[nvmeof]: ceph osd pool create {poolname}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'ceph', 'osd', 'pool', 'create', poolname + ]) + + log.info(f'[nvmeof]: rbd pool init {poolname}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'rbd', 'pool', 'init', poolname + ]) + + log.info(f'[nvmeof]: ceph orch apply nvmeof {poolname}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'ceph', 'orch', 'apply', 'nvmeof', poolname, + '--placement', str(len(nodes)) + ';' + ';'.join(nodes) + ]) + + log.info(f'[nvmeof]: rbd create {poolname}/{imagename} --size {self.rbd_size}') + _shell(self.ctx, self.cluster_name, self.remote, [ + 'rbd', 'create', f'{poolname}/{imagename}', '--size', f'{self.rbd_size}' + ]) + + for role, i in daemons.items(): + remote, id_ = i + self.ctx.daemons.register_daemon( + remote, 'nvmeof', id_, + cluster=self.cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + log.info("[nvmeof]: executed deploy_nvmeof successfully!") + + def set_gateway_cfg(self): + log.info('[nvmeof]: running set_gateway_cfg...') + gateway_config = self.config.get('gateway_config', {}) + source_host = gateway_config.get('source') + target_host = gateway_config.get('target') + if not (source_host and target_host): + raise ConfigError('gateway_config requires "source" and "target"') + remote = list(self.ctx.cluster.only(source_host).remotes.keys())[0] + ip_address = remote.ip_address + gateway_name = "" + nvmeof_daemons = self.ctx.daemons.iter_daemons_of_role('nvmeof', cluster=self.cluster_name) + for daemon in nvmeof_daemons: + if ip_address == daemon.remote.ip_address: + gateway_name = daemon.name() + conf_data = dedent(f""" + NVMEOF_GATEWAY_IP_ADDRESS={ip_address} + NVMEOF_GATEWAY_NAME={gateway_name} + NVMEOF_CLI_IMAGE="quay.io/ceph/nvmeof-cli:{self.cli_image}" + NVMEOF_BDEV={self.bdev} + NVMEOF_SERIAL={self.serial} + NVMEOF_NQN={self.nqn} + NVMEOF_PORT={self.port} + NVMEOF_SRPORT={self.srport} + """) + target_remote = list(self.ctx.cluster.only(target_host).remotes.keys())[0] + target_remote.write_file( + path=conf_file, + data=conf_data, + sudo=True + ) + log.info("[nvmeof]: executed set_gateway_cfg successfully!") + + +task = Nvmeof diff --git a/qa/workunits/rbd/nvmeof_basic_tests.sh b/qa/workunits/rbd/nvmeof_basic_tests.sh new file mode 100755 index 0000000000000..878e043fbeb56 --- /dev/null +++ b/qa/workunits/rbd/nvmeof_basic_tests.sh @@ -0,0 +1,72 @@ +#!/bin/bash -x + +source /etc/ceph/nvmeof.env +SPDK_CONTROLLER="SPDK bdev Controller" +DISCOVERY_PORT="8009" + +discovery() { + output=$(sudo nvme discover -t tcp -a $NVMEOF_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT) + expected_discovery_stdout="subtype: nvme subsystem" + if ! echo "$output" | grep -q "$expected_discovery_stdout"; then + return 1 + fi +} + +connect() { + sudo nvme connect -t tcp --traddr $NVMEOF_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n $NVMEOF_NQN + output=$(sudo nvme list) + if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +disconnect_all() { + sudo nvme disconnect-all + output=$(sudo nvme list) + if echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +connect_all() { + sudo nvme connect-all --traddr=$NVMEOF_GATEWAY_IP_ADDRESS --transport=tcp + output=$(sudo nvme list) + if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +list_subsys() { + expected_count=$1 + output=$(sudo nvme list-subsys --output-format=json) + multipath=$(echo $output | grep -c '"tcp"') + if [ "$multipath" -ne "$expected_count" ]; then + return 1 + fi +} + + +test_run() { + echo "[nvmeof] Running test: $1" + $1 "${@:2}" # execute func + if [ $? -eq 0 ]; then + echo "[nvmeof] $1 test passed!" + else + echo "[nvmeof] $1 test failed!" + exit 1 + fi +} + + +test_run disconnect_all +test_run discovery +test_run connect +test_run list_subsys 1 +test_run disconnect_all +test_run list_subsys 0 +test_run connect_all +test_run list_subsys 1 + + +echo "-------------Test Summary-------------" +echo "[nvmeof] All nvmeof basic tests passed!" diff --git a/qa/workunits/rbd/nvmeof_fio_test.sh b/qa/workunits/rbd/nvmeof_fio_test.sh new file mode 100755 index 0000000000000..bacc15e83eb7a --- /dev/null +++ b/qa/workunits/rbd/nvmeof_fio_test.sh @@ -0,0 +1,36 @@ +#!/bin/bash -ex + +sudo yum -y install fio +sudo yum -y install sysstat + +fio_file=$(mktemp -t nvmeof-fio-XXXX) +drives_list=$(sudo nvme list --output-format=json | jq -r '.Devices | .[] | select(.ModelNumber == "SPDK bdev Controller") | .DevicePath') + +RUNTIME=${RUNTIME:-600} +# IOSTAT_INTERVAL=10 + + +cat >> $fio_file < $io_input_file -truncate -s 2k $io_input_file -sudo dd if=$io_input_file of=$nvme_drive oflag=direct count=1 bs=2k #write -io_output_file="/tmp/nvmeof_test_output" -sudo dd if=$nvme_drive of=$io_output_file iflag=direct count=1 bs=2k #read -if ! cmp $io_input_file $io_output_file; then - echo "nvmeof initiator - io test failed!" - exit 1 -fi -sudo rm -f $io_input_file $io_output_file -echo "Test 3: basic IO - passed!" - -echo "nvmeof initiator tests passed!" +echo "[nvmeof] Initiator setup done" \ No newline at end of file