From af7e4a0c73ef82a9b2301f687fb4ab06b3d361f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Aug 2016 23:50:34 +0300 Subject: [PATCH] Try to improve performance of ColumnString::insertInto [#METR-22173]. --- dbms/include/DB/Columns/ColumnString.h | 69 ++++++++++++++++++-------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index ac054dd8fc..b7846f443d 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -86,43 +86,71 @@ public: void insert(const Field & x) override { const String & s = DB::get(x); - size_t old_size = chars.size(); - size_t size_to_append = s.size() + 1; + const size_t old_size = chars.size(); + const size_t size_to_append = s.size() + 1; + const size_t new_size = old_size + size_to_append; - chars.resize(old_size + size_to_append); + chars.resize(new_size); memcpy(&chars[old_size], s.c_str(), size_to_append); - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append); + offsets.push_back(new_size); } void insertFrom(const IColumn & src_, size_t n) override { const ColumnString & src = static_cast(src_); - size_t old_size = chars.size(); - size_t size_to_append = src.sizeAt(n); - size_t offset = src.offsetAt(n); - chars.resize(old_size + size_to_append); - memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[offset], size_to_append); - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + size_to_append); + if (n != 0) + { + const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; + + if (size_to_append == 1) + { + /// shortcut for empty string + chars.push_back(0); + offsets.push_back(chars.size()); + } + else + { + const size_t old_size = chars.size(); + const size_t offset = src.offsets[n - 1]; + const size_t new_size = old_size + size_to_append; + + chars.resize(new_size); + memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[offset], size_to_append); + offsets.push_back(new_size); + } + } + else + { + const size_t old_size = chars.size(); + const size_t size_to_append = src.offsets[0]; + const size_t new_size = old_size + size_to_append; + + chars.resize(new_size); + memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[0], size_to_append); + offsets.push_back(new_size); + } } void insertData(const char * pos, size_t length) override { - size_t old_size = chars.size(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + length + 1; - chars.resize(old_size + length + 1); + chars.resize(new_size); memcpy(&chars[old_size], pos, length); chars[old_size + length] = 0; - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length + 1); + offsets.push_back(new_size); } void insertDataWithTerminatingZero(const char * pos, size_t length) override { - size_t old_size = chars.size(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + length; - chars.resize(old_size + length); + chars.resize(new_size); memcpy(&chars[old_size], pos, length); - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + length); + offsets.push_back(new_size); } void popBack(size_t n) override @@ -149,14 +177,15 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override { - size_t string_size = *reinterpret_cast(pos); + const size_t string_size = *reinterpret_cast(pos); pos += sizeof(string_size); - size_t old_size = chars.size(); - chars.resize(old_size + string_size); + const size_t old_size = chars.size(); + const size_t new_size = old_size + string_size; + chars.resize(new_size); memcpy(&chars[old_size], pos, string_size); - offsets.push_back((offsets.size() == 0 ? 0 : offsets.back()) + string_size); + offsets.push_back(new_size); return pos + string_size; } -- GitLab