From 0ba72436db5bf81e158ab62c5bd5201e30c746d3 Mon Sep 17 00:00:00 2001 From: artpaul Date: Tue, 18 Apr 2017 18:25:50 +0500 Subject: [PATCH] #214 fix conversion from fixed-string to string --- dbms/src/Functions/FunctionsStringSearch.cpp | 54 ++++++++++--------- .../00394_replaceall_vector_fixed.reference | 4 ++ .../00394_replaceall_vector_fixed.sql | 11 ++++ 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index e49cbe9bc3..daf595f741 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -832,6 +832,8 @@ struct ReplaceStringImpl } } + /// Note: this function converts fixed-length strings to variable-length strings + /// and each variable-length string should ends with zero byte. static void vector_fixed(const ColumnString::Chars_t & data, size_t n, const std::string & needle, @@ -844,9 +846,9 @@ struct ReplaceStringImpl const UInt8 * end = pos + data.size(); ColumnString::Offset_t res_offset = 0; - size_t size = data.size() / n; + size_t count = data.size() / n; res_data.reserve(data.size()); - res_offsets.resize(size); + res_offsets.resize(count); /// The current index in the string array. size_t i = 0; @@ -858,33 +860,45 @@ struct ReplaceStringImpl { const UInt8 * match = searcher.search(pos, end - pos); - /// Copy the data without changing - res_data.resize(res_data.size() + (match - pos)); - memcpy(&res_data[res_offset], pos, match - pos); - - /// Let's determine which index it belongs to. - while (i < size && begin + n * (i + 1) <= match) +#define COPY_REST_OF_CURRENT_STRING() \ + do { \ + const size_t len = begin + n * (i + 1) - pos; \ + res_data.resize(res_data.size() + len + 1); \ + memcpy(&res_data[res_offset], pos, len); \ + res_offset += len; \ + res_data[res_offset++] = 0; \ + res_offsets[i] = res_offset; \ + pos = begin + n * (i + 1); \ + ++i; \ + } while (false) + + /// Copy skipped strings without any changes but + /// add zero byte to the end of each string. + while (i < count && begin + n * (i + 1) <= match) { - res_offsets[i] = res_offset + ((begin + n * (i + 1)) - pos) + 1; - ++i; + COPY_REST_OF_CURRENT_STRING(); } - res_offset += (match - pos); /// If you have reached the end, it's time to stop - if (i == size) + if (i == count) break; + /// Copy unchanged part of current string. + res_data.resize(res_data.size() + (match - pos)); + memcpy(&res_data[res_offset], pos, match - pos); + res_offset += (match - pos); + /// Is it true that this line no longer needs to perform conversions. bool can_finish_current_string = false; /// We check that the entry does not pass through the boundaries of strings. - if (match + needle.size() - 1 < begin + n * (i + 1)) + if (match + needle.size() <= begin + n * (i + 1)) { res_data.resize(res_data.size() + replacement.size()); memcpy(&res_data[res_offset], replacement.data(), replacement.size()); res_offset += replacement.size(); pos = match + needle.size(); - if (replace_one) + if (replace_one || pos == begin + n * (i + 1)) can_finish_current_string = true; } else @@ -895,17 +909,9 @@ struct ReplaceStringImpl if (can_finish_current_string) { - res_data.resize(res_data.size() + (begin + n * (i + 1) - pos)); - memcpy(&res_data[res_offset], pos, (begin + n * (i + 1) - pos)); - res_offset += (begin + n * (i + 1) - pos); - res_offsets[i] = res_offset; - pos = begin + n * (i + 1); + COPY_REST_OF_CURRENT_STRING(); } - } - - if (i < size) - { - res_offsets[i] = res_offset + ((begin + n * (i + 1)) - pos) + 1; +#undef COPY_REST_OF_CURRENT_STRING } } diff --git a/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.reference b/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.reference index 5b9a519f2f..8bd77d3312 100644 --- a/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.reference +++ b/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.reference @@ -2,3 +2,7 @@ bao ba* bar bar boa b*a foo f** +54db0d43009d\0\0\0\0 54db0d43009d**** +fe2b58224766cf10 fe2b58224766cf10 +54db0d43009d\0\0\0\0 54db0d43009d**** +fe2b58224766cf10 fe2b58224766cf10 diff --git a/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.sql b/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.sql index 479d329948..ff192cb61f 100644 --- a/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.sql +++ b/dbms/tests/queries/0_stateless/00394_replaceall_vector_fixed.sql @@ -13,3 +13,14 @@ FROM test.replaceall ORDER BY str ASC; DROP TABLE test.replaceall; + +CREATE TABLE test.replaceall (date Date DEFAULT today(), fs FixedString(16)) ENGINE = MergeTree(date, (date, fs), 8192); +INSERT INTO test (fs) VALUES + ('54db0d43009d\0\0\0\0'), ('fe2b58224766cf10'), + ('54db0d43009d\0\0\0\0'), ('fe2b58224766cf10'); + +SELECT fs, replaceAll(fs, '\0', '*') +FROM test.replaceall +ORDER BY fs ASC; + +DROP TABLE test.replaceall; -- GitLab