From b7df029fa65fc024293c000a6b2dd4d9236b6e2a Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <akuzm@yandex-team.ru>
Date: Fri, 27 Dec 2019 00:33:10 +0300
Subject: [PATCH] [wip] performance comparison fixes

---
 docker/test/performance-comparison/Dockerfile | 16 +++++--
 docker/test/performance-comparison/compare.sh | 48 ++++++++++++-------
 .../test/performance-comparison/entrypoint.sh |  8 ++++
 docker/test/performance-comparison/eqmed.sql  |  2 +-
 docker/test/performance-comparison/perf.py    | 21 ++++----
 5 files changed, 66 insertions(+), 29 deletions(-)
 create mode 100755 docker/test/performance-comparison/entrypoint.sh
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index 45900d414b..1e08ec0f52 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -1,9 +1,17 @@
 # docker build -t yandex/clickhouse-performance-comparison .
-FROM alpine
+FROM ubuntu:18.04
 
-RUN apk update && apk add --no-cache bash wget python3 python3-dev g++
-RUN pip3 --no-cache-dir install clickhouse_driver
-RUN apk del g++ python3-dev
+RUN apt-get update \
+    && apt-get install --yes --no-install-recommends \
+        p7zip-full bash ncdu wget python3 python3-pip python3-dev g++ \
+    && pip3 --no-cache-dir install clickhouse_driver \
+    && apt-get purge --yes python3-dev g++ \
+    && apt-get autoremove --yes \
+    && apt-get clean
 
 COPY * /
 
+CMD /entrypoint.sh
+
+# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e LEFT_PR=<> -e LEFT_SHA=<> -e RIGHT_PR=<> -e RIGHT_SHA=<> yandex/clickhouse-performance-comparison
+
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 5e7fa7e79f..7ecf715403 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -6,8 +6,6 @@ trap "kill 0" EXIT
 
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-mkdir left ||:
-mkdir right ||:
 mkdir db0 ||:
 
 left_pr=$1
@@ -18,19 +16,21 @@ right_sha=$4
 
 function download
 {
+    rm -r left ||:
+    mkdir left ||:
+    rm -r right ||:
+    mkdir right ||:
+
     la="$left_pr-$left_sha.tgz"
     ra="$right_pr-$right_sha.tgz"
-    wget -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O "$la" && tar -C left --strip-components=1 -zxvf "$la" &
-    wget -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O "$ra" && tar -C right --strip-components=1 -zxvf "$ra" &
-    cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" && tar -xvf hits_10m_single.tar &
-    cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" && tar -xvf hits_100m_single.tar &
-    cd db0 && wget -nd -c "https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" && tar -xvf hits_v1.tar &
-    cd db0 && wget -nd -c "https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar" && tar -xvf visits_v1.tar &
+    wget -q -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O "$la" && tar -C left --strip-components=1 -zxvf "$la" &
+    wget -q -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O "$ra" && tar -C right --strip-components=1 -zxvf "$ra" &
+    cd db0 && wget -q -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" && tar -xvf hits_10m_single.tar &
+    cd db0 && wget -q -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" && tar -xvf hits_100m_single.tar &
+    cd db0 && wget -q -nd -c "https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" && tar -xvf hits_v1.tar &
     wait
 
     # Use hardlinks instead of copying
-    rm -r left/db ||:
-    rm -r right/db ||:
     cp -al db0/ left/db/
     cp -al db0/ right/db/
 }
@@ -40,16 +40,26 @@ function configure
 {
     sed -i 's/<tcp_port>9000/<tcp_port>9001/g' right/config/config.xml
 
-    cat > right/config/config.d/perf-test-tweaks.xml <<EOF
+    cat > right/config/config.d/zz-perf-test-tweaks.xml <<EOF
     <yandex>
         <logger>
             <console>true</console>
         </logger>
-        <text_log remove="remove"/>
+        <text_log remove="remove">
+            <table remove="remove"/>
+        </text_log>
+        <metric_log remove="remove">
+            <table remove="remove"/>
+        </metric_log>
     </yandex>
 EOF
 
-    cp right/config/config.d/perf-test-tweaks.xml left/config/config.d/perf-test-tweaks.xml
+    cp right/config/config.d/zz-perf-test-tweaks.xml left/config/config.d/zz-perf-test-tweaks.xml
+
+    rm left/config/config.d/metric_log.xml ||:
+    rm left/config/config.d/text_log.xml ||:
+    rm right/config/config.d/metric_log.xml ||:
+    rm right/config/config.d/text_log.xml ||:
 }
 configure
 
@@ -78,6 +88,11 @@ function restart
 
     while ! right/clickhouse client --port 9001 --query "select 1" ; do kill -0 $right_pid ; echo . ; sleep 1 ; done
     echo right ok
+
+    right/clickhouse client --port 9001 --query "create database test" ||:
+    right/clickhouse client --port 9001 --query "rename table datasets.hits_v1 to test.hits" ||:
+    left/clickhouse client --port 9000 --query "create database test" ||:
+    left/clickhouse client --port 9000 --query "rename table datasets.hits_v1 to test.hits" ||:
 }
 restart
 
@@ -90,13 +105,14 @@ function run_tests
     for test in left/performance/*.xml
     do
         test_name=$(basename $test ".xml")
-        "$script_dir/perf.py" "$test" > "$test_name-raw.tsv" || continue
+        "$script_dir/perf.py" "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" || continue
         right/clickhouse local --file "$test_name-raw.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv"
     done
 }
 run_tests
 
 # Analyze results
-result_structure="fail int, left float, right float, diff float, rd Array(float), query text"
+result_structure="left float, right float, diff float, rd Array(float), query text"
 right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where rd[3] > 0.05 order by rd[3] desc" > flap-prone.tsv
-right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > failed.tsv
+right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > bad-perf.tsv
+grep Exception:[^:] *-err.log > run-errors.log
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
new file mode 100755
index 0000000000..7ef5a9553a
--- /dev/null
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+cd /workspace
+
+../compare.sh $LEFT_PR $LEFT_SHA $RIGHT_PR $RIGHT_SHA > compare.log 2>&1
+
+7z a /output/output.7z *.log *.tsv
+cp compare.log /output
diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql
index 22df87a289..5e8d842b7d 100644
--- a/docker/test/performance-comparison/eqmed.sql
+++ b/docker/test/performance-comparison/eqmed.sql
@@ -38,4 +38,4 @@ from
         group by query
    ) original_medians_array
 where rd.query = original_medians_array.query
-order by fail desc, rd_quantiles_percent[3] asc;
+order by rd_quantiles_percent[3] desc;
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 7a3e50e204..5517a71cc4 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -15,8 +15,13 @@ root = tree.getroot()
 
 # Check main metric
 main_metric_element = root.find('main_metric/*')
-if main_metric_element and main_metric_element.tag != 'min_time':
-    raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric))
+if main_metric_element is not None and main_metric_element.tag != 'min_time':
+    raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))
+
+# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't.
+infinite_sign = root.find('.//average_speed_not_changing_for_ms')
+if infinite_sign is not None:
+    raise Exception('Looks like the test is infinite (sign 1)')
 
 # Open connections
 servers = [{'host': 'localhost', 'port': 9000, 'client_name': 'left'}, {'host': 'localhost', 'port': 9001, 'client_name': 'right'}]
@@ -24,12 +29,9 @@ connections = [clickhouse_driver.Client(**server) for server in servers]
 
 # Check tables that should exist
 tables = [e.text for e in root.findall('preconditions/table_exists')]
-if tables:
+for t in tables:
     for c in connections:
-        tables_list = ", ".join("'{}'".format(t) for t in tables)
-        res = c.execute("select t from values('t text', {}) anti join system.tables on database = currentDatabase() and name = t".format(tables_list))
-        if res:
-            raise Exception('Some tables are not found: {}'.format(res))
+        res = c.execute("select 1 from {}".format(t))
 
 # Apply settings
 settings = root.findall('settings/*')
@@ -76,6 +78,9 @@ for c in connections:
         c.execute(q)
 
 # Run test queries
+def tsv_escape(s):
+    return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
+
 test_query_templates = [q.text for q in root.findall('query')]
 test_queries = substitute_parameters(test_query_templates, parameter_combinations)
 
@@ -83,7 +88,7 @@ for q in test_queries:
     for run in range(0, 7):
         for conn_index, c in enumerate(connections):
             res = c.execute(q)
-            print(q + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
+            print(tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
 
 # Run drop queries
 drop_query_templates = [q.text for q in root.findall('drop_query')]
-- 
GitLab