From 9f9b794feb84129a4f9a07d015fc2c137131d247 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 00:15:22 +0200 Subject: [PATCH 01/15] Export logs from CI in performance (preparation) --- docker/test/performance-comparison/Dockerfile | 5 ++--- docker/test/performance-comparison/compare.sh | 2 ++ docker/test/performance-comparison/entrypoint.sh | 8 ++------ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index cfd7c6138686..1cc644ba0b11 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -56,10 +56,9 @@ COPY * / # node #0 should be less stable because of system interruptions. We bind # randomly to node 1 or 0 to gather some statistics on that. We have to bind # both servers and the tmpfs on which the database is stored. How to do it -# through Yandex Sandbox API is unclear, but by default tmpfs uses +# is unclear, but by default tmpfs uses # 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. We could also try to -# remount it with proper options in Sandbox task. +# writes to it, so just bind the downloader script as well. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index f949e66ab179..6814ffc5efd6 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -71,6 +71,8 @@ function configure while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done echo all killed + + set -m # Spawn temporary in its own process groups local setup_left_server_opts=( diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 74571777be07..fb5e6bd2a7af 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -130,7 +130,7 @@ then git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt fi -# Set python output encoding so that we can print queries with Russian letters. +# Set python output encoding so that we can print queries with non-ASCII letters. export PYTHONIOENCODING=utf-8 # By default, use the main comparison script from the tested package, so that we @@ -151,11 +151,7 @@ export PATH export REF_PR export REF_SHA -# Try to collect some core dumps. I've seen two patterns in Sandbox: -# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c -# Not sure what this script does (puts them to sandbox resources, logs some messages?), -# and it's not accessible from inside docker anyway. -# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory. +# Try to collect some core dumps. # At least we remove the ulimit and then try to pack some common file names into output. ulimit -c unlimited cat /proc/sys/kernel/core_pattern From ec779ca246e4f4944cdf4bddda99ca40d2035275 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 00:31:48 +0200 Subject: [PATCH 02/15] Export logs from CI in performance (part 2) --- docker/test/base/setup_export_logs.sh | 13 +++---- docker/test/performance-comparison/compare.sh | 35 +++++++++++++++++-- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 12fae855b032..fdaf22a5d59e 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -7,6 +7,7 @@ # Pre-configured destination cluster, where to export the data CLUSTER=${CLUSTER:=system_logs_export} +LOCAL_PARAMETERS=$1 EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "} EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"} @@ -15,13 +16,13 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "} CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""} # Create all configured system logs: -clickhouse-client --query "SYSTEM FLUSH LOGS" +clickhouse-client $LOCAL_PARAMETERS --query "SYSTEM FLUSH LOGS" # For each system log table: -clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table +clickhouse-client $LOCAL_PARAMETERS --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table do # Calculate hash of its structure: - hash=$(clickhouse-client --query " + hash=$(clickhouse-client $LOCAL_PARAMETERS --query " SELECT sipHash64(groupArray((name, type))) FROM (SELECT name, type FROM system.columns WHERE database = 'system' AND table = '$table' @@ -29,7 +30,7 @@ do ") # Create the destination table with adapted name and structure: - statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + statement=$(clickhouse-client $LOCAL_PARAMETERS --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' s/^\($/('"$EXTRA_COLUMNS"'/; s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; @@ -43,7 +44,7 @@ do echo "Creating table system.${table}_sender" >&2 # Create Distributed table and materialized view to watch on the original table: - clickhouse-client --query " + clickhouse-client $LOCAL_PARAMETERS --query " CREATE TABLE system.${table}_sender ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash}) EMPTY AS @@ -53,7 +54,7 @@ do echo "Creating materialized view system.${table}_watcher" >&2 - clickhouse-client --query " + clickhouse-client $LOCAL_PARAMETERS --query " CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS SELECT ${EXTRA_COLUMNS_EXPRESSION}, * FROM system.${table} diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 6814ffc5efd6..816bdef51c3d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -63,6 +63,22 @@ function left_or_right() function configure { + # Setup a cluster for logs export to ClickHouse Cloud + # Note: these variables are provided to the Docker run command by the Python script in tests/ci + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + echo " +remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > right/config/config.d/system_logs_export.yaml + fi + # Use the new config for both servers, so that we can change it in a PR. rm right/config/config.d/text_log.xml ||: cp -rv right/config left ||: @@ -71,8 +87,6 @@ function configure while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done echo all killed - - set -m # Spawn temporary in its own process groups local setup_left_server_opts=( @@ -92,7 +106,22 @@ function configure set +m wait_for_server $LEFT_SERVER_PORT $left_pid - echo Server for setup started + echo "Server for setup started" + + # Initialize export of system logs to ClickHouse Cloud + # Note: it is set up for the "left" server, and its database is then cloned to the "right" server. + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + ./setup_export_logs.sh "--port $LEFT_SERVER_PORT" + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' + fi clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: From 32778e04071f08262d134b407e64d554d171bb01 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 00:34:01 +0200 Subject: [PATCH 03/15] Export logs from CI in performance (part 3) --- tests/ci/performance_comparison_check.py | 42 ++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 70d37b24c4ea..975ca26b7e8f 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import logging import sys @@ -20,11 +19,15 @@ from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen +from clickhouse_helper import get_instance_type +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/performance-comparison" def get_run_command( + check_start_time, + check_name, workspace, result_path, repo_tests_path, @@ -33,12 +36,26 @@ def get_run_command( additional_env, image, ): + instance_type = get_instance_type() + + envs = [ + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e PR_TO_TEST={pr_to_test}", + f"-e SHA_TO_TEST={sha_to_test}", + ] + + env_str = " ".join(envs) + return ( f"docker run --privileged --volume={workspace}:/workspace " f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " - f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " + f"{envs} {additional_env} " f"{image}" ) @@ -62,6 +79,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) repo_tests_path = os.path.join(repo_path, "tests") @@ -157,6 +177,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): docker_env += "".join([f" -e {name}" for name in env_extra]) run_command = get_run_command( + stopwatch.start_time_str, + check_name, result_path, result_path, repo_tests_path, @@ -180,6 +202,22 @@ def __exit__(self, exc_type, exc_val, exc_tb): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + shell=True, + ) + paths = { "compare.log": os.path.join(result_path, "compare.log"), "output.7z": os.path.join(result_path, "output.7z"), From 221dd53d37bd555c27a2a001eb87d5142759149d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 00:36:50 +0200 Subject: [PATCH 04/15] Fixup --- tests/ci/performance_comparison_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 975ca26b7e8f..3fd664106977 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -55,7 +55,7 @@ def get_run_command( f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " - f"{envs} {additional_env} " + f"{env_str} {additional_env} " f"{image}" ) From 49485a67700f80bd1bf544ef894ef62bf6ec451a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 02:43:51 +0200 Subject: [PATCH 05/15] Fix shellcheck --- docker/test/base/setup_export_logs.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index fdaf22a5d59e..ef510552d2f9 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -1,4 +1,5 @@ #!/bin/bash +# shellcheck disable=SC2086 # This script sets up export of system log tables to a remote server. # Remote tables are created if not exist, and augmented with extra columns, From 8dc0884099665db603f7903fe5a2fa79de8b9a2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 04:20:06 +0200 Subject: [PATCH 06/15] Fix error --- docker/test/performance-comparison/compare.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 816bdef51c3d..9711131f6547 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -75,6 +75,7 @@ remote_servers: secure: 1 user: ci host: '${CLICKHOUSE_CI_LOGS_HOST}' + port: 9440 password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' " > right/config/config.d/system_logs_export.yaml fi From 645834ffb6e39c2017ec3a89c194491b744afc0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 04:54:20 +0200 Subject: [PATCH 07/15] Fix errors --- docker/test/performance-comparison/download.sh | 2 -- tests/ci/performance_comparison_check.py | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index aee110300685..cb243b655c6b 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -31,8 +31,6 @@ function download # Test all of them. declare -a urls_to_try=( "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 3fd664106977..70369f9881ed 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -190,6 +190,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): logging.info("Going to run command %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + compare_log_path = os.path.join(result_path, "compare.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -214,12 +215,12 @@ def __exit__(self, exc_type, exc_val, exc_tb): "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" ) subprocess.check_call( - f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{compare_log_path}'", shell=True, ) paths = { - "compare.log": os.path.join(result_path, "compare.log"), + "compare.log": compare_log_path, "output.7z": os.path.join(result_path, "output.7z"), "report.html": os.path.join(result_path, "report.html"), "all-queries.html": os.path.join(result_path, "all-queries.html"), From d434e58eae7ff8ec9b7fa7e94398fe3dd84444e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 06:45:41 +0200 Subject: [PATCH 08/15] Fix errors --- docker/test/performance-comparison/Dockerfile | 4 ---- docker/test/performance-comparison/compare.sh | 20 ++++++++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1cc644ba0b11..f3cab77bdbb1 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,9 +1,5 @@ # docker build -t clickhouse/performance-comparison . -# Using ubuntu:22.04 over 20.04 as all other images, since: -# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend -# b) anyway for perf tests it should not be important (backward compatiblity -# with older ubuntu had been checked lots of times in various tests) FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 9711131f6547..fe37df9ec757 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -67,6 +67,7 @@ function configure # Note: these variables are provided to the Docker run command by the Python script in tests/ci if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] then + set +x echo " remote_servers: system_logs_export: @@ -78,6 +79,7 @@ remote_servers: port: 9440 password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' " > right/config/config.d/system_logs_export.yaml + set -x fi # Use the new config for both servers, so that we can change it in a PR. @@ -113,15 +115,19 @@ remote_servers: # Note: it is set up for the "left" server, and its database is then cloned to the "right" server. if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] then - export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" - export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + ( + set +x + export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" - ./setup_export_logs.sh "--port $LEFT_SERVER_PORT" + ./setup_export_logs.sh "--port $LEFT_SERVER_PORT" - # Unset variables after use - export CONNECTION_PARAMETERS='' - export CLICKHOUSE_CI_LOGS_HOST='' - export CLICKHOUSE_CI_LOGS_PASSWORD='' + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' + set -x + ) fi clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: From d1e50b1cbf3962844b745e8cc7518c287fe13e7e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 06:48:19 +0200 Subject: [PATCH 09/15] Simplification --- docker/test/performance-comparison/Dockerfile | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index f3cab77bdbb1..d31663f90711 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,14 +1,7 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:22.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV LANG=C.UTF-8 -ENV TZ=Europe/Amsterdam -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ From 957045c70bd2957d76cefefbc92bce2c778e32f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 13 Aug 2023 20:15:30 +0200 Subject: [PATCH 10/15] Maybe fix error --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index fe37df9ec757..09d33647f557 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -120,7 +120,7 @@ remote_servers: export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" - ./setup_export_logs.sh "--port $LEFT_SERVER_PORT" + /setup_export_logs.sh "--port $LEFT_SERVER_PORT" # Unset variables after use export CONNECTION_PARAMETERS='' From 3c6af254bc1c1548052eb3c679d444f2835f90a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Aug 2023 01:26:04 +0200 Subject: [PATCH 11/15] Remove something --- docker/test/performance-comparison/compare.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 09d33647f557..ce8c4903c00e 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -194,9 +194,9 @@ function restart wait_for_server $RIGHT_SERVER_PORT $right_pid echo right ok - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options" - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options" # Check again that both servers we started are running -- this is important @@ -390,14 +390,12 @@ function get_profiles wait clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: & - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & From e11e7c62181f39b756d0a49c25c3ce1879aa02e5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Aug 2023 01:26:15 +0200 Subject: [PATCH 12/15] Fix typos --- .../config/config.d/zzz-perf-comparison-tweaks-config.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 39c29bb61ca1..10a5916264a6 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,10 +19,11 @@ - + And so, to avoid extra memory reference switch *_log to Memory engine. + --> ENGINE = Memory From e357702fd05b77ed01e43a9be38e1e6dfff393a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 14 Aug 2023 01:26:38 +0200 Subject: [PATCH 13/15] What will happen if I remove this? --- .../zzz-perf-comparison-tweaks-config.xml | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 10a5916264a6..292665c4f68e 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,32 +19,6 @@ - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - 1000000000 10 From 857856b8b674c46e4c768780efdc9631a1fdcc87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 17 Aug 2023 03:58:32 +0200 Subject: [PATCH 14/15] Leave only simplifications --- docker/test/performance-comparison/compare.sh | 38 ------------------- tests/ci/performance_comparison_check.py | 18 --------- 2 files changed, 56 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index ce8c4903c00e..4b1b5c13b9b1 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -63,25 +63,6 @@ function left_or_right() function configure { - # Setup a cluster for logs export to ClickHouse Cloud - # Note: these variables are provided to the Docker run command by the Python script in tests/ci - if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] - then - set +x - echo " -remote_servers: - system_logs_export: - shard: - replica: - secure: 1 - user: ci - host: '${CLICKHOUSE_CI_LOGS_HOST}' - port: 9440 - password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' -" > right/config/config.d/system_logs_export.yaml - set -x - fi - # Use the new config for both servers, so that we can change it in a PR. rm right/config/config.d/text_log.xml ||: cp -rv right/config left ||: @@ -111,25 +92,6 @@ remote_servers: wait_for_server $LEFT_SERVER_PORT $left_pid echo "Server for setup started" - # Initialize export of system logs to ClickHouse Cloud - # Note: it is set up for the "left" server, and its database is then cloned to the "right" server. - if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] - then - ( - set +x - export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" - export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" - - /setup_export_logs.sh "--port $LEFT_SERVER_PORT" - - # Unset variables after use - export CONNECTION_PARAMETERS='' - export CLICKHOUSE_CI_LOGS_HOST='' - export CLICKHOUSE_CI_LOGS_PASSWORD='' - set -x - ) - fi - clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 70369f9881ed..27a67e2ae0ea 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -39,8 +39,6 @@ def get_run_command( instance_type = get_instance_type() envs = [ - "-e CLICKHOUSE_CI_LOGS_HOST", - "-e CLICKHOUSE_CI_LOGS_PASSWORD", f"-e CHECK_START_TIME='{check_start_time}'", f"-e CHECK_NAME='{check_name}'", f"-e INSTANCE_TYPE='{instance_type}'", @@ -203,22 +201,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - # Cleanup run log from the credentials of CI logs database. - # Note: a malicious user can still print them by splitting the value into parts. - # But we will be warned when a malicious user modifies CI script. - # Although they can also print them from inside tests. - # Nevertheless, the credentials of the CI logs have limited scope - # and does not provide access to sensitive info. - - ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") - ci_logs_password = os.getenv( - "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" - ) - subprocess.check_call( - f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{compare_log_path}'", - shell=True, - ) - paths = { "compare.log": compare_log_path, "output.7z": os.path.join(result_path, "output.7z"), From c3e6f7e9ae792b54ef713beb8a5513307af119f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 17 Aug 2023 03:59:15 +0200 Subject: [PATCH 15/15] Leave only simplifications --- docker/test/base/setup_export_logs.sh | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index ef510552d2f9..12fae855b032 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -1,5 +1,4 @@ #!/bin/bash -# shellcheck disable=SC2086 # This script sets up export of system log tables to a remote server. # Remote tables are created if not exist, and augmented with extra columns, @@ -8,7 +7,6 @@ # Pre-configured destination cluster, where to export the data CLUSTER=${CLUSTER:=system_logs_export} -LOCAL_PARAMETERS=$1 EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "} EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"} @@ -17,13 +15,13 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "} CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""} # Create all configured system logs: -clickhouse-client $LOCAL_PARAMETERS --query "SYSTEM FLUSH LOGS" +clickhouse-client --query "SYSTEM FLUSH LOGS" # For each system log table: -clickhouse-client $LOCAL_PARAMETERS --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table +clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table do # Calculate hash of its structure: - hash=$(clickhouse-client $LOCAL_PARAMETERS --query " + hash=$(clickhouse-client --query " SELECT sipHash64(groupArray((name, type))) FROM (SELECT name, type FROM system.columns WHERE database = 'system' AND table = '$table' @@ -31,7 +29,7 @@ do ") # Create the destination table with adapted name and structure: - statement=$(clickhouse-client $LOCAL_PARAMETERS --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' s/^\($/('"$EXTRA_COLUMNS"'/; s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; @@ -45,7 +43,7 @@ do echo "Creating table system.${table}_sender" >&2 # Create Distributed table and materialized view to watch on the original table: - clickhouse-client $LOCAL_PARAMETERS --query " + clickhouse-client --query " CREATE TABLE system.${table}_sender ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash}) EMPTY AS @@ -55,7 +53,7 @@ do echo "Creating materialized view system.${table}_watcher" >&2 - clickhouse-client $LOCAL_PARAMETERS --query " + clickhouse-client --query " CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS SELECT ${EXTRA_COLUMNS_EXPRESSION}, * FROM system.${table}