diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index 55d98f9ce2265e494ccc717cb93ad084f832f94f..bad6eb372693b9256af59738b95068ce0773c56c 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -306,21 +306,11 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique) dictionary.setShared(column_unique); } -ColumnLowCardinality::MutablePtr ColumnLowCardinality::compact() -{ - auto positions = idx.getPositions(); - /// Create column with new indexes and old dictionary. - auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate()); - /// Will create new dictionary. - column->compactInplace(); - - return column; -} - ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const { auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate(); /// Create column with new indexes and old dictionary. + /// Dictionary is shared, but will be recreated after compactInplace call. auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions)); /// Will create new dictionary. column->compactInplace(); diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index f93e7cb80e7f0dce5edf8972ea8db407ecd7c2b1..91f5337b63394517f0640202c4c38cf803819c8d 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -177,10 +177,8 @@ public: void setSharedDictionary(const ColumnPtr & column_unique); bool isSharedDictionary() const { return dictionary.isShared(); } - /// Create column new dictionary with only keys that are mentioned in index. - MutablePtr compact(); - - /// Cut + compact. + /// Create column with new dictionary from column part. + /// Dictionary will have only keys that are mentioned in index. MutablePtr cutAndCompact(size_t start, size_t length) const; struct DictionaryEncodedColumn diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index 105e989f69a9226c7e9f4778a5df5a389cb510cd..98b662d8fe8b25e211b34f0db751acb373cb2dbe 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -195,6 +195,12 @@ struct DeserializeStateLowCardinality : public IDataType::DeserializeBinaryBulkS ColumnPtr null_map; UInt64 num_pending_rows = 0; + /// If dictionary should be updated. + /// Can happen is some granules was skipped while reading from MergeTree. + /// We should store this flag in State because + /// in case of long block of empty arrays we may not need read dictionary at first reading. + bool need_update_dictionary = false; + explicit DeserializeStateLowCardinality(UInt64 key_version) : key_version(key_version) {} }; @@ -686,7 +692,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams( if (!settings.continuous_reading) low_cardinality_state->num_pending_rows = 0; - bool first_dictionary = true; + if (!settings.continuous_reading) + { + /// Remember in state that some granules were skipped and we need to update dictionary. + low_cardinality_state->need_update_dictionary = true; + } + while (limit) { if (low_cardinality_state->num_pending_rows == 0) @@ -699,10 +710,12 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams( index_type.deserialize(*indexes_stream); - if (index_type.need_global_dictionary && (!global_dictionary || index_type.need_update_dictionary || (first_dictionary && !settings.continuous_reading))) + bool need_update_dictionary = + !global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary; + if (index_type.need_global_dictionary && need_update_dictionary) { readDictionary(); - first_dictionary = false; + low_cardinality_state->need_update_dictionary = false; } if (low_cardinality_state->index_type.has_additional_keys) diff --git a/dbms/tests/queries/0_stateless/00931_low_cardinality_read_with_empty_array.reference b/dbms/tests/queries/0_stateless/00931_low_cardinality_read_with_empty_array.reference new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dbms/tests/queries/0_stateless/00931_low_cardinality_read_with_empty_array.sql b/dbms/tests/queries/0_stateless/00931_low_cardinality_read_with_empty_array.sql new file mode 100644 index 0000000000000000000000000000000000000000..8c9e60706732920a6ded8faaf7a8b4dffbe9067d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00931_low_cardinality_read_with_empty_array.sql @@ -0,0 +1,7 @@ +drop table if exists test.lc; +create table test.lc (key UInt64, value Array(LowCardinality(String))) engine = MergeTree order by key; +insert into test.lc select number, if(number < 10000 or number > 100000, [toString(number)], emptyArrayString()) from system.numbers limit 200000; +select * from test.lc where (key < 100 or key > 50000) and not has(value, toString(key)) and length(value) == 1 limit 10 settings max_block_size = 8192, max_threads = 1; + +drop table if exists test.lc; +