From d4992da5469af1e5fe817db78e5f96241de52be5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Mar 2017 02:42:04 +0300 Subject: [PATCH] Implemented isIn methods for cache dictionaries [#CLICKHOUSE-2144]. --- .../include/DB/Dictionaries/CacheDictionary.h | 10 +- .../Functions/FunctionsExternalDictionaries.h | 5 +- dbms/src/Dictionaries/CacheDictionary.cpp | 117 ++++++++++++++++-- 3 files changed, 121 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 3a8c1f44b4..fba88fc365 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -93,9 +93,9 @@ public: void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; -/* void isInVectorVector(const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + void isInVectorVector(const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; - void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; */ + void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; #define DECLARE(TYPE)\ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, PaddedPODArray & out) const; @@ -242,6 +242,12 @@ private: FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; + template + void isInImpl( + const PaddedPODArray & child_ids, + const AncestorType & ancestor_ids, + PaddedPODArray & out) const; + const std::string name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; diff --git a/dbms/include/DB/Functions/FunctionsExternalDictionaries.h b/dbms/include/DB/Functions/FunctionsExternalDictionaries.h index 474949d310..a6796f9fd0 100644 --- a/dbms/include/DB/Functions/FunctionsExternalDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsExternalDictionaries.h @@ -1338,6 +1338,8 @@ using FunctionDictGetDateOrDefault = FunctionDictGetOrDefault; using FunctionDictGetDateTimeOrDefault = FunctionDictGetOrDefault; +/// Functions to work with hierarchies. + class FunctionDictGetHierarchy final : public IFunction { public: @@ -1409,7 +1411,8 @@ private: "Dictionary does not have a hierarchy", ErrorCodes::UNSUPPORTED_METHOD}; - const auto get_hierarchies = [&] (const PaddedPODArray & in, PaddedPODArray & out, PaddedPODArray & offsets) { + const auto get_hierarchies = [&] (const PaddedPODArray & in, PaddedPODArray & out, PaddedPODArray & offsets) + { const auto size = in.size(); /// copy of `in` array diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index bded7d8ac2..2ca703e3ce 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -61,20 +61,121 @@ void CacheDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray & arr, const size_t idx) { return arr[idx]; } +static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t idx) { return value; } + + +template +void CacheDictionary::isInImpl( + const PaddedPODArray & child_ids, + const AncestorType & ancestor_ids, + PaddedPODArray & out) const +{ + /// Transform all children to parents until ancestor id or null_value will be reached. + + size_t size = out.size(); + memset(out.data(), 0xFF, size); /// 0xFF means "not calculated" + + const auto null_value = std::get(hierarchical_attribute->null_values); + + const PaddedPODArray * current_children = &child_ids; + PaddedPODArray children(size); + PaddedPODArray parents(size); + + while (true) + { + toParent(*current_children, parents); + + size_t out_idx = 0; + size_t parents_idx = 0; + size_t new_children_idx = 0; + + while (out_idx < size) + { + /// Already calculated + if (out[out_idx] != 0xFF) + { + ++out_idx; + continue; + } + + /// No parent + if (parents[parents_idx] == null_value) + { + out[out_idx] = 0; + } + /// Found ancestor + else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx)) + { + out[out_idx] = 1; + } + /// Found intermediate parent, add this value to search at next loop iteration + else + { + children[new_children_idx] = parents[parents_idx]; + ++new_children_idx; + } + + ++out_idx; + ++parents_idx; + } + + if (new_children_idx == 0) + break; + + /// Will process new children at next loop iteration. + children.resize(new_children_idx); + parents.resize(new_children_idx); + current_children = &children; + } +} + +void CacheDictionary::isInVectorVector( const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const { - memset(out.data(), 0, out.size() * sizeof(out[0])); + isInImpl(child_ids, ancestor_ids, out); +} - const PaddedPODArray * current_child_ids = &child_ids; - PaddedPODArray child_ids_buffer; - PaddedPODArray parents(out.size()); +void CacheDictionary::isInVectorConstant( + const PaddedPODArray & child_ids, + const Key ancestor_id, + PaddedPODArray & out) const +{ + isInImpl(child_ids, ancestor_id, out); +} - toParent(*current_child_ids, parents); - for (size_t i = 0,) -}*/ +void CacheDictionary::isInConstantVector( + const Key child_id, + const PaddedPODArray & ancestor_ids, + PaddedPODArray & out) const +{ + /// Special case with single child value. + + const auto null_value = std::get(hierarchical_attribute->null_values); + + PaddedPODArray child(1, child_id); + PaddedPODArray parent(1); + std::vector ancestors(1, child_id); + + /// Iteratively find all ancestors for child. + while (true) + { + toParent(child, parent); + + if (parent[0] == null_value) + break; + + child[0] = parent[0]; + ancestors.push_back(parent[0]); + } + + /// Assuming short hierarchy, so linear search is Ok. + for (size_t i = 0, size = out.size(); i < size; ++i) + out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end(); +} #define DECLARE(TYPE)\ -- GitLab