Make `-- { echo }` hint preserve leading comments.

c153268d · Alexander Kuzmenkov · 8fe37ac7 · c153268d · c153268d · c153268d
14 changed file
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -335,7 +335,7 @@ function run_tests
    time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

    # substr is to remove semicolon after test name
-    readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
+    readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")

    # We will rerun sequentially any tests that have failed during parallel run.
    # They might have failed because there was some interference from other tests

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -670,14 +670,14 @@ private:
                    actual_client_error = e.code();
                    if (!actual_client_error || actual_client_error != expected_client_error)
                    {
-                        std::cerr << std::endl
-                            << "Exception on client:" << std::endl
-                            << "Code: " << e.code() << ". " << e.displayText() << std::endl;
+                    std::cerr << std::endl
+                        << "Exception on client:" << std::endl
+                        << "Code: " << e.code() << ". " << e.displayText() << std::endl;

-                        if (config().getBool("stacktrace", false))
-                            std::cerr << "Stack trace:" << std::endl << e.getStackTraceString() << std::endl;
+                    if (config().getBool("stacktrace", false))
+                        std::cerr << "Stack trace:" << std::endl << e.getStackTraceString() << std::endl;

-                        std::cerr << std::endl;
+                    std::cerr << std::endl;

                    }

@@ -845,8 +845,59 @@ private:
        return processMultiQuery(text);
    }

+    // Consumes trailing semicolons and tries to consume the same-line trailing
+    // comment.
+    static void adjustQueryEnd(const char *& this_query_end,
+        const char * all_queries_end, int max_parser_depth)
+    {
+        // We have to skip the trailing semicolon that might be left
+        // after VALUES parsing or just after a normal semicolon-terminated query.
+        Tokens after_query_tokens(this_query_end, all_queries_end);
+        IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth);
+        while (after_query_iterator.isValid()
+           && after_query_iterator->type == TokenType::Semicolon)
+        {
+            this_query_end = after_query_iterator->end;
+            ++after_query_iterator;
+        }
+
+        // Now we have to do some extra work to add the trailing
+        // same-line comment to the query, but preserve the leading
+        // comments of the next query. The trailing comment is important
+        // because the test hints are usually written this way, e.g.:
+        // select nonexistent_column; -- { serverError 12345 }.
+        // The token iterator skips comments and whitespace, so we have
+        // to find the newline in the string manually. If it's earlier
+        // than the next significant token, it means that the text before
+        // newline is some trailing whitespace or comment, and we should
+        // add it to our query. There are also several special cases
+        // that are described below.
+        const auto * newline = find_first_symbols<'\n'>(this_query_end,
+            all_queries_end);
+        const char * next_query_begin = after_query_iterator->begin;
+
+        // We include the entire line if the next query starts after
+        // it. This is a generic case of trailing in-line comment.
+        // The "equals" condition is for case of end of input (they both equal
+        // all_queries_end);
+        if (newline <= next_query_begin)
+        {
+            assert(newline >= this_query_end);
+            this_query_end = newline;
+        }
+        else
+        {
+            // Many queries on one line, can't do anything. By the way, this
+            // syntax is probably going to work as expected:
+            // select nonexistent /* { serverError 12345 } */; select 1
+        }
+    }
+
    bool processMultiQuery(const String & all_queries_text)
    {
+        // It makes sense not to base any control flow on this, so that it is
+        // the same in tests and in normal usage. The only difference is that in
+        // normal mode we ignore the test hints.
        const bool test_mode = config().has("testmode");

        {
@@ -871,35 +922,31 @@ private:

        while (this_query_begin < all_queries_end)
        {
-            // Use the token iterator to skip any whitespace, semicolons and
-            // comments at the beginning of the query. An example from regression
-            // tests:
-            //      insert into table t values ('invalid'); -- { serverError 469 }
-            //      select 1
-            // Here the test hint comment gets parsed as a part of second query.
-            // We parse the `INSERT VALUES` up to the semicolon, and the rest
-            // looks like a two-line query:
-            //      -- { serverError 469 }
-            //      select 1
-            // and we expect it to fail with error 469, but this hint is actually
-            // for the previous query. Test hints should go after the query, so
-            // we can fix this by skipping leading comments. Token iterator skips
-            // comments and whitespace by itself, so we only have to check for
-            // semicolons.
-            // The code block is to limit visibility of `tokens` because we have
-            // another such variable further down the code, and get warnings for
-            // that.
+            // Remove leading empty newlines and other whitespace, because they
+            // are annoying to filter in query log. This is mostly relevant for
+            // the tests.
+            while (this_query_begin < all_queries_end
+                 && isWhitespaceASCII(*this_query_begin))
+            {
+                ++this_query_begin;
+            }
+            if (this_query_begin >= all_queries_end)
+            {
+                break;
+            }
+
+            // If there are only comments left until the end of file, we just
+            // stop. The parser can't handle this situation because it always
+            // expects that there is some query that it can parse.
+            // We can get into this situation because the parser also doesn't
+            // skip the trailing comments after parsing a query. This is because
+            // they may as well be the leading comments for the next query,
+            // and it makes more sense to treat them as such.
            {
                Tokens tokens(this_query_begin, all_queries_end);
                IParser::Pos token_iterator(tokens,
                    context.getSettingsRef().max_parser_depth);
-                while (token_iterator->type == TokenType::Semicolon
-                        && token_iterator.isValid())
-                {
-                    ++token_iterator;
-                }
-                this_query_begin = token_iterator->begin;
-                if (this_query_begin >= all_queries_end)
+                if (!token_iterator.isValid())
                {
                    break;
                }
@@ -913,14 +960,23 @@ private:
            }
            catch (Exception & e)
            {
-                if (!test_mode)
-                    throw;
+                // Try to find test hint for syntax error. We don't know where
+                // the query ends because we failed to parse it, so we consume
+                // the entire line.
+                this_query_end = find_first_symbols<'\n'>(this_query_end,
+                    all_queries_end);
+
+                TestHint hint(test_mode,
+                    String(this_query_begin, this_query_end - this_query_begin));

-                /// Try find test hint for syntax error
-                const char * end_of_line = find_first_symbols<'\n'>(this_query_begin,all_queries_end);
-                TestHint hint(true, String(this_query_end, end_of_line - this_query_end));
-                if (hint.serverError()) /// Syntax errors are considered as client errors
+                if (hint.serverError())
+                {
+                    // Syntax errors are considered as client errors
+                    e.addMessage("\nExpected server error '{}'.",
+                        hint.serverError());
                    throw;
+                }
+
                if (hint.clientError() != e.code())
                {
                    if (hint.clientError())
@@ -929,7 +985,7 @@ private:
                }

                /// It's expected syntax error, skip the line
-                this_query_begin = end_of_line;
+                this_query_begin = this_query_end;
                continue;
            }

@@ -956,10 +1012,14 @@ private:
            // The VALUES format needs even more handling -- we also allow the
            // data to be delimited by semicolon. This case is handled later by
            // the format parser itself.
+            // We can't do multiline INSERTs with inline data, because most
+            // row input formats (e.g. TSV) can't tell when the input stops,
+            // unlike VALUES.
            auto * insert_ast = parsed_query->as<ASTInsertQuery>();
            if (insert_ast && insert_ast->data)
            {
-                this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
+                this_query_end = find_first_symbols<'\n'>(insert_ast->data,
+                    all_queries_end);
                insert_ast->end = this_query_end;
                query_to_send = all_queries_text.substr(
                    this_query_begin - all_queries_text.data(),
@@ -972,61 +1032,75 @@ private:
                    this_query_end - this_query_begin);
            }

-            // full_query is the query + inline INSERT data.
+            // Try to include the trailing comment with test hints. It is just
+            // a guess for now, because we don't yet know where the query ends
+            // if it is an INSERT query with inline data. We will do it again
+            // after we have processed the query. But even this guess is
+            // beneficial so that we see proper trailing comments in "echo" and
+            // server log.
+            adjustQueryEnd(this_query_end, all_queries_end,
+                context.getSettingsRef().max_parser_depth);
+
+            // full_query is the query + inline INSERT data + trailing comments
+            // (the latter is our best guess for now).
            full_query = all_queries_text.substr(
                this_query_begin - all_queries_text.data(),
                this_query_end - this_query_begin);

+            if (query_fuzzer_runs)
+            {
+                if (!processWithFuzzing(full_query))
+                    return false;
+
+                this_query_begin = this_query_end;
+                continue;
+            }
+
            // Look for the hint in the text of query + insert data, if any.
            // e.g. insert into t format CSV 'a' -- { serverError 123 }.
            TestHint test_hint(test_mode, full_query);
            expected_client_error = test_hint.clientError();
            expected_server_error = test_hint.serverError();

-            if (query_fuzzer_runs)
-            {
-                if (!processWithFuzzing(full_query))
-                    return false;
-            }
-            else
+            try
            {
-                try
-                {
-                    processParsedSingleQuery();
+                processParsedSingleQuery();

-                    if (insert_ast && insert_ast->data)
-                    {
-                        // For VALUES format: use the end of inline data as reported
-                        // by the format parser (it is saved in sendData()). This
-                        // allows us to handle queries like:
-                        //   insert into t values (1); select 1
-                        //, where the inline data is delimited by semicolon and not
-                        // by a newline.
-                        this_query_end = parsed_query->as<ASTInsertQuery>()->end;
-                    }
-                }
-                catch (...)
+                if (insert_ast && insert_ast->data)
                {
-                    last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
-                    actual_client_error = last_exception_received_from_server->code();
-                    if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
-                        std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
-                    received_exception_from_server = true;
+                    // For VALUES format: use the end of inline data as reported
+                    // by the format parser (it is saved in sendData()). This
+                    // allows us to handle queries like:
+                    //   insert into t values (1); select 1
+                    //, where the inline data is delimited by semicolon and not
+                    // by a newline.
+                    this_query_end = parsed_query->as<ASTInsertQuery>()->end;
+
+                    adjustQueryEnd(this_query_end, all_queries_end,
+                        context.getSettingsRef().max_parser_depth);
                }
+            }
+            catch (...)
+            {
+                last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
+                actual_client_error = last_exception_received_from_server->code();
+                if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
+                    std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
+                received_exception_from_server = true;
+            }

-                if (!test_hint.checkActual(
-                    actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server))
-                {
-                    connection->forceConnected(connection_parameters.timeouts);
-                }
+            if (!test_hint.checkActual(
+                actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server))
+            {
+                connection->forceConnected(connection_parameters.timeouts);
+            }

-                if (received_exception_from_server && !ignore_error)
-                {
-                    if (is_interactive)
-                        break;
-                    else
-                        return false;
-                }
+            if (received_exception_from_server && !ignore_error)
+            {
+                if (is_interactive)
+                    break;
+                else
+                    return false;
            }

            this_query_begin = this_query_end;

--- a/programs/client/TestHint.h
+++ b/programs/client/TestHint.h
@@ -23,18 +23,27 @@ namespace ErrorCodes
 class TestHint
 {
 public:
-    TestHint(bool enabled_, const String & query_)
-    : enabled(enabled_)
-    , query(query_)
+    TestHint(bool enabled_, const String & query_) :
+        enabled(enabled_),
+        query(query_)
    {
        if (!enabled_)
            return;

+        // Don't parse error hints in leading comments, because it feels weird.
+        // Leading 'echo' hint is OK.
+        bool is_leading_hint = true;
+
        Lexer lexer(query.data(), query.data() + query.size());

        for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken())
        {
-            if (token.type == TokenType::Comment)
+            if (token.type != TokenType::Comment
+                && token.type != TokenType::Whitespace)
+            {
+                is_leading_hint = false;
+            }
+            else if (token.type == TokenType::Comment)
            {
                String comment(token.begin, token.begin + token.size());

@@ -47,7 +56,7 @@ public:
                        if (pos_end != String::npos)
                        {
                            String hint(comment.begin() + pos_start + 1, comment.begin() + pos_end);
-                            parse(hint);
+                            parse(hint, is_leading_hint);
                        }
                    }
                }
@@ -60,7 +69,9 @@ public:
                     bool & got_exception, std::unique_ptr<Exception> & last_exception) const
    {
        if (!enabled)
+        {
            return true;
+        }

        if (allErrorsExpected(actual_server_error, actual_client_error))
        {
@@ -94,7 +105,7 @@ private:
    int client_error = 0;
    bool echo = false;

-    void parse(const String & hint)
+    void parse(const String & hint, bool is_leading_hint)
    {
        std::stringstream ss;       // STYLE_CHECK_ALLOW_STD_STRING_STREAM
        ss << hint;
@@ -106,11 +117,15 @@ private:
            if (ss.eof())
                break;

-            if (item == "serverError")
-                ss >> server_error;
-            else if (item == "clientError")
-                ss >> client_error;
-            else if (item == "echo")
+            if (!is_leading_hint)
+            {
+                if (item == "serverError")
+                    ss >> server_error;
+                else if (item == "clientError")
+                    ss >> client_error;
+            }
+
+            if (item == "echo")
                echo = true;
        }
    }

--- a/src/IO/Operators.h
+++ b/src/IO/Operators.h
@@ -46,6 +46,7 @@ template <typename T>     WriteBuffer & operator<< (WriteBuffer & buf, const T &
 /// If you do not use the manipulators, the string is displayed without an escape, as is.
 template <> inline        WriteBuffer & operator<< (WriteBuffer & buf, const String & x)   { writeString(x, buf);   return buf; }
 template <> inline        WriteBuffer & operator<< (WriteBuffer & buf, const std::string_view & x)   { writeString(StringRef(x), buf);   return buf; }
+template <> inline WriteBuffer & operator<< (WriteBuffer & buf, const StringRef & x) { writeString(x, buf); return buf; }
 template <> inline        WriteBuffer & operator<< (WriteBuffer & buf, const char & x)     { writeChar(x, buf);     return buf; }
 template <> inline        WriteBuffer & operator<< (WriteBuffer & buf, const pcg32_fast & x) { PcgSerializer::serializePcg32(x, buf); return buf; }


--- a/src/Parsers/TokenIterator.cpp
+++ b/src/Parsers/TokenIterator.cpp
@@ -4,12 +4,13 @@
 namespace DB
 {

-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token * last)
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last)
 {
    /// We have just two kind of parentheses: () and [].
    UnmatchedParentheses stack;

-    for (TokenIterator it = begin; it.isValid() && &it.get() <= last; ++it)
+    for (TokenIterator it = begin;
+        it.isValid() && it->begin <= last.begin; ++it)
    {
        if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket)
        {

--- a/src/Parsers/TokenIterator.h
+++ b/src/Parsers/TokenIterator.h
@@ -80,6 +80,6 @@ public:

 /// Returns positions of unmatched parentheses.
 using UnmatchedParentheses = std::vector<Token>;
-UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token * last);
+UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin, Token last);

 }
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@@ -78,6 +78,10 @@ void writeQueryWithHighlightedErrorPositions(
    for (size_t position_to_hilite_idx = 0; position_to_hilite_idx < num_positions_to_hilite; ++position_to_hilite_idx)
    {
        const char * current_position_to_hilite = positions_to_hilite[position_to_hilite_idx].begin;
+
+        assert(current_position_to_hilite < end);
+        assert(current_position_to_hilite >= begin);
+
        out.write(pos, current_position_to_hilite - pos);

        if (current_position_to_hilite == end)
@@ -189,6 +193,10 @@ std::string getLexicalErrorMessage(
    writeQueryAroundTheError(out, begin, end, hilite, &last_token, 1);

    out << getErrorTokenDescription(last_token.type);
+    if (last_token.size())
+    {
+       out << ": '" << StringRef{last_token.begin, last_token.size()} << "'";
+    }

    return out.str();
 }
@@ -217,8 +225,8 @@ std::string getUnmatchedParenthesesErrorMessage(

 ASTPtr tryParseQuery(
    IParser & parser,
-    const char * & pos,
-    const char * end,
+    const char * & _out_query_end, /* also query begin as input parameter */
+    const char * all_queries_end,
    std::string & out_error_message,
    bool hilite,
    const std::string & query_description,
@@ -226,7 +234,8 @@ ASTPtr tryParseQuery(
    size_t max_query_size,
    size_t max_parser_depth)
 {
-    Tokens tokens(pos, end, max_query_size);
+    const char * query_begin = _out_query_end;
+    Tokens tokens(query_begin, all_queries_end, max_query_size);
    IParser::Pos token_iterator(tokens, max_parser_depth);

    if (token_iterator->isEnd()
@@ -241,70 +250,90 @@ ASTPtr tryParseQuery(
        //"
        // Advance the position, so that we can use this parser for stream parsing
        // even in presence of such queries.
-        pos = token_iterator->begin;
+        _out_query_end = token_iterator->begin;
        return nullptr;
    }

    Expected expected;
-
    ASTPtr res;
-    bool parse_res = parser.parse(token_iterator, res, expected);
-    Token last_token = token_iterator.max();
+    const bool parse_res = parser.parse(token_iterator, res, expected);
+    const auto last_token = token_iterator.max();
+    _out_query_end = last_token.end;

-    /// If parsed query ends at data for insertion. Data for insertion could be in any format and not necessary be lexical correct.
    ASTInsertQuery * insert = nullptr;
    if (parse_res)
        insert = res->as<ASTInsertQuery>();

-    if (!(insert && insert->data))
+    // If parsed query ends at data for insertion. Data for insertion could be
+    // in any format and not necessary be lexical correct, so we can't perform
+    // most of the checks.
+    if (insert && insert->data)
    {
-        /// Lexical error
-        if (last_token.isError())
+        if (!parse_res)
        {
-            out_error_message = getLexicalErrorMessage(pos, end, last_token, hilite, query_description);
+            // Generic parse error.
+            out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
+                last_token, expected, hilite, query_description);
            return nullptr;
        }

-        /// Unmatched parentheses
-        UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), &last_token);
-        if (!unmatched_parens.empty())
-        {
-            out_error_message = getUnmatchedParenthesesErrorMessage(pos, end, unmatched_parens, hilite, query_description);
-            return nullptr;
-        }
+        return res;
+    }
+
+    // More granular checks for queries other than INSERT w/inline data.
+    /// Lexical error
+    if (last_token.isError())
+    {
+        out_error_message = getLexicalErrorMessage(query_begin, all_queries_end,
+            last_token, hilite, query_description);
+        return nullptr;
+    }
+
+    /// Unmatched parentheses
+    UnmatchedParentheses unmatched_parens = checkUnmatchedParentheses(TokenIterator(tokens), last_token);
+    if (!unmatched_parens.empty())
+    {
+        out_error_message = getUnmatchedParenthesesErrorMessage(query_begin,
+            all_queries_end, unmatched_parens, hilite, query_description);
+        return nullptr;
    }

    if (!parse_res)
    {
-        /// Parse error.
-        out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description);
+        /// Generic parse error.
+        out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
+            last_token, expected, hilite, query_description);
        return nullptr;
    }

    /// Excessive input after query. Parsed query must end with end of data or semicolon or data for INSERT.
    if (!token_iterator->isEnd()
-        && token_iterator->type != TokenType::Semicolon
-        && !(insert && insert->data))
+        && token_iterator->type != TokenType::Semicolon)
    {
-        expected.add(pos, "end of query");
-        out_error_message = getSyntaxErrorMessage(pos, end, last_token, expected, hilite, query_description);
+        expected.add(last_token.begin, "end of query");
+        out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
+            last_token, expected, hilite, query_description);
        return nullptr;
    }

+    // Skip the semicolon that might be left after parsing the VALUES format.
    while (token_iterator->type == TokenType::Semicolon)
+    {
        ++token_iterator;
+    }

-    /// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
+    // If multi-statements are not allowed, then after semicolon, there must
+    // be no non-space characters.
    if (!allow_multi_statements
-        && !token_iterator->isEnd()
-        && !(insert && insert->data))
+        && !token_iterator->isEnd())
    {
-        out_error_message = getSyntaxErrorMessage(pos, end, last_token, {}, hilite,
-            (query_description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
+        out_error_message = getSyntaxErrorMessage(query_begin, all_queries_end,
+            last_token, {}, hilite,
+            (query_description.empty() ? std::string() : std::string(". "))
+                + "Multi-statements are not allowed");
        return nullptr;
    }

-    pos = token_iterator->begin;
    return res;
 }


--- a/src/Parsers/parseQuery.h
+++ b/src/Parsers/parseQuery.h
@@ -9,7 +9,7 @@ namespace DB
 /// Parse query or set 'out_error_message'.
 ASTPtr tryParseQuery(
    IParser & parser,
-    const char * & pos,                /// Moved to end of parsed fragment.
+    const char * & _out_query_end, // query start as input parameter, query end as output
    const char * end,
    std::string & out_error_message,
    bool hilite,

--- a/src/Parsers/queryNormalization.h
+++ b/src/Parsers/queryNormalization.h
@@ -126,9 +126,17 @@ inline void ALWAYS_INLINE normalizeQueryToPODArray(const char * begin, const cha
            if (!prev_insignificant)
            {
                if (0 == num_literals_in_sequence)
-                    res_data.push_back(' ');
+                {
+                    // If it's leading whitespace, ignore it altogether.
+                    if (token.begin != begin)
+                    {
+                        res_data.push_back(' ');
+                    }
+                }
                else
+                {
                    prev_whitespace = true;
+                }
            }
            prev_insignificant = true;
            continue;

--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -568,7 +568,7 @@ def main(args):
    if not check_server_started(args.client, args.server_check_retries):
        raise Exception(
            "Server is not responding. Cannot execute 'SELECT 1' query. \
-            Note: if you are using unbundled mode, you also have to specify -c option.")
+            Note: if you are using split build, you may have to specify -c option.")

    build_flags = collect_build_flags(args.client)
    if args.antlr:
@@ -846,10 +846,10 @@ if __name__ == '__main__':
    parser.add_argument('--tmp', help='Path to tmp dir')

    parser.add_argument('-b', '--binary', default='clickhouse',
-        help='Path to clickhouse (if bundled, clickhouse-server otherwise) binary or name of binary in PATH')
+        help='Path to clickhouse (if monolithic build, clickhouse-server otherwise) binary or name of binary in PATH')

    parser.add_argument('-c', '--client',
-        help='Path to clickhouse-client (if unbundled, useless otherwise) binary of name of binary in PATH')
+        help='Path to clickhouse-client (if split build, useless otherwise) binary of name of binary in PATH')

    parser.add_argument('--extract_from_config', help='extract-from-config program')
    parser.add_argument('--configclient', help='Client config (if you use not default ports)')
@@ -930,11 +930,11 @@ if __name__ == '__main__':
        if find_binary(args.binary + '-client'):
            args.client = args.binary + '-client'

-            print("Using " + args.client + " as client program (expecting unbundled mode)")
+            print("Using " + args.client + " as client program (expecting split build)")
        elif find_binary(args.binary):
            args.client = args.binary + ' client'

-            print("Using " + args.client + " as client program (expecting bundled mode)")
+            print("Using " + args.client + " as client program (expecting monolithic build)")
        else:
            print("No 'clickhouse' or 'clickhouse-client' client binary found", file=sys.stderr)
            parser.print_help()

--- a/tests/queries/0_stateless/00651_default_database_on_client_reconnect.sh
+++ b/tests/queries/0_stateless/00651_default_database_on_client_reconnect.sh
@@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

-${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "DROP TABLE IF EXISTS tab_00651; CREATE TABLE tab_00651 (val UInt64) engine = Memory; SHOW CREATE TABLE tab_00651 format abcd; DESC tab_00651; DROP TABLE tab_00651;" ||: 2> /dev/null
+${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "DROP TABLE IF EXISTS tab_00651; CREATE TABLE tab_00651 (val UInt64) engine = Memory; SHOW CREATE TABLE tab_00651 format abcd; DESC tab_00651; DROP TABLE tab_00651;" 2>/dev/null ||:
--- a/tests/queries/0_stateless/01091_num_threads.sql
+++ b/tests/queries/0_stateless/01091_num_threads.sql
@@ -8,7 +8,7 @@ WITH
    (
        SELECT query_id
        FROM system.query_log
-        WHERE (query = 'WITH 01091 AS id SELECT 1;\n') AND (event_date >= (today() - 1))
+        WHERE (normalizeQuery(query) like normalizeQuery('WITH 01091 AS id SELECT 1;')) AND (event_date >= (today() - 1))
        ORDER BY event_time DESC
        LIMIT 1
    ) AS id
@@ -23,7 +23,7 @@ WITH
    (
        SELECT query_id
        FROM system.query_log
-        WHERE (query LIKE 'with 01091 as id select sum(number) from numbers(1000000);%') AND (event_date >= (today() - 1))
+        WHERE (normalizeQuery(query) = normalizeQuery('with 01091 as id select sum(number) from numbers(1000000);')) AND (event_date >= (today() - 1))
        ORDER BY event_time DESC
        LIMIT 1
    ) AS id
@@ -38,7 +38,7 @@ WITH
    (
        SELECT query_id
        FROM system.query_log
-        WHERE (query LIKE 'with 01091 as id select sum(number) from numbers_mt(1000000);%') AND (event_date >= (today() - 1))
+        WHERE (normalizeQuery(query) = normalizeQuery('with 01091 as id select sum(number) from numbers_mt(1000000);')) AND (event_date >= (today() - 1))
        ORDER BY event_time DESC
        LIMIT 1
    ) AS id

--- a/tests/queries/0_stateless/01531_query_log_query_comment.sql
+++ b/tests/queries/0_stateless/01531_query_log_query_comment.sql
@@ -4,9 +4,9 @@ set log_queries_min_type='QUERY_FINISH';
 set enable_global_with_statement=1;
 select /* test=01531, enable_global_with_statement=0 */ 2;
 system flush logs;
-select count() from system.query_log where event_time >= now() - interval 5 minute and query = 'select /* test=01531, enable_global_with_statement=0 */ 2;\n';
+select count() from system.query_log where event_time >= now() - interval 5 minute and query like '%select /* test=01531, enable_global_with_statement=0 */ 2%';

 set enable_global_with_statement=1;
 select /* test=01531 enable_global_with_statement=1 */ 2;
 system flush logs;
-select count() from system.query_log where event_time >= now() - interval 5 minute and query = 'select /* test=01531 enable_global_with_statement=1 */ 2;\n';
+select count() from system.query_log where event_time >= now() - interval 5 minute and query like '%select /* test=01531 enable_global_with_statement=1 */ 2%';
--- a/tests/queries/0_stateless/01591_window_functions.reference
+++ b/tests/queries/0_stateless/01591_window_functions.reference
-set allow_experimental_window_functions = 1;
+-- { echo }

+set allow_experimental_window_functions = 1;
 -- just something basic
-
 select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
-
-- proper calculation across blocks
-
 0	1
 1	2
 2	3
@@ -16,10 +13,8 @@ select number, count() over (partition by intDiv(number, 3) order by number) fro
 7	2
 8	3
 9	1
+-- proper calculation across blocks
 select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
-
-- not a window function
-
 2	2
 1	2
 0	2
@@ -30,14 +25,10 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 7	8
 6	8
 9	9
+-- not a window function
 select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
-
 -- no partition by
-
 select number, avg(number) over (order by number) from numbers(10);
-
-- no order by
-
 0	0
 1	0.5
 2	1
@@ -48,10 +39,8 @@ select number, avg(number) over (order by number) from numbers(10);
 7	3.5
 8	4
 9	4.5
+-- no order by
 select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
-
-- can add an alias after window spec
-
 0	0
 1	1
 2	1
@@ -62,11 +51,8 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) from
 7	7
 8	7
 9	9
+-- can add an alias after window spec
 select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
-
-- can't reference it yet -- the window functions are calculated at the
-- last stage of select, after all other functions.
-
 0	0
 1	1
 2	1
@@ -77,21 +63,16 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro
 7	7
 8	7
 9	9
+-- can't reference it yet -- the window functions are calculated at the
+-- last stage of select, after all other functions.
 select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
-
 -- must work in WHERE if you wrap it in a subquery
-
 select * from (select count(*) over () c from numbers(3)) where c > 0;
-
-- should work in ORDER BY
-
 1
 2
 3
+-- should work in ORDER BY
 select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
-
-- also works in ORDER BY if you wrap it in a subquery
-
 9	9
 6	8
 7	8
@@ -102,43 +83,33 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 0	2
 1	2
 2	2
+-- also works in ORDER BY if you wrap it in a subquery
 select * from (select count(*) over () c from numbers(3)) order by c;
-
+1
+2
+3
 -- Example with window function only in ORDER BY. Here we make a rank of all
 -- numbers sorted descending, and then sort by this rank descending, and must get
 -- the ascending order.
-
+select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
+0
 1
 2
 3
-select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc;
-
+4
 -- Aggregate functions as window function arguments. This query is semantically
 -- the same as the above one, only we replace `number` with
 -- `any(number) group by number` and so on.
-
+select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number) + 1) over (order by min(number) desc) desc;
 0
 1
 2
 3
 4
-select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number) + 1) over (order by min(number) desc) desc;
-
 -- different windows
 -- an explain test would also be helpful, but it's too immature now and I don't
 -- want to change reference all the time
-
-0
-1
-2
-3
-4
 select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
-
-- two functions over the same window
-- an explain test would also be helpful, but it's too immature now and I don't
-- want to change reference all the time
-
 0	2	1
 1	2	2
 2	2	3
@@ -170,10 +141,10 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 28	29	4
 29	29	5
 30	30	1
+-- two functions over the same window
+-- an explain test would also be helpful, but it's too immature now and I don't
+-- want to change reference all the time
 select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
-
-- check that we can work with constant columns
-
 0	2	3
 1	2	2
 2	2	1
@@ -181,35 +152,26 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
 4	5	2
 5	5	1
 6	6	1
+-- check that we can work with constant columns
 select median(x) over (partition by x) from (select 1 x);
-
-- an empty window definition is valid as well
-
 1
+-- an empty window definition is valid as well
 select groupArray(number) over () from numbers(3);
-
-- This one tests we properly process the window  function arguments.
-- Seen errors like 'column `1` not found' from count(1).
-
 [0]
 [0,1]
 [0,1,2]
+-- This one tests we properly process the window  function arguments.
+-- Seen errors like 'column `1` not found' from count(1).
 select count(1) over (), max(number + 1) over () from numbers(3);
-
-- Should work in DISTINCT
-
 1	3
+-- Should work in DISTINCT
 select distinct sum(0) over () from numbers(2);
-
 0
 select distinct any(number) over () from numbers(2);
-
+0
 -- Various kinds of aliases are properly substituted into various parts of window
 -- function definition.
-
-0
 with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7);
-
 0	1
 0	3
 0	6