diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index f13d6f6e3ddccae6a5363593fa84981a80ff98d7..398b4b594da6e8092e12afdd9c152525be7c0dca 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -358,6 +358,12 @@ protected: template friend class TwoLevelHashTable; + template + friend class TwoLevelStringHashTable; + + template + friend class StringHashTable; + using HashValue = size_t; using Self = HashTable; using cell_type = Cell; diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h new file mode 100644 index 0000000000000000000000000000000000000000..4fcc46eee2486a4f30e995bdee6cbbfad495607c --- /dev/null +++ b/dbms/src/Common/HashTable/StringHashMap.h @@ -0,0 +1,180 @@ +#pragma once + +#include +#include +#include + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; +}; + +template +auto lookupResultGetMapped(StringHashMapCell * cell) { return &cell->getSecond(); } + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method + static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; } + void setZero() { this->value.first.low = 0; } +}; + +template +struct StringHashMapCell : public HashMapCell +{ + using Base = HashMapCell; + using Base::Base; + static constexpr bool need_zero_value_storage = false; + bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } + // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method + static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; } + void setZero() { this->value.first.a = 0; } +}; + +template +struct StringHashMapCell : public HashMapCellWithSavedHash +{ + using Base = HashMapCellWithSavedHash; + using Base::Base; + static constexpr bool need_zero_value_storage = false; +}; + +template +struct StringHashMapSubMaps +{ + using T0 = StringHashTableEmpty>; + using T1 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T2 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T3 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; + using Ts = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; +}; + +template +class StringHashMap : public StringHashTable> +{ +public: + using Base = StringHashTable>; + using Self = StringHashMap; + using Key = StringRef; + using key_type = StringRef; + using mapped_type = TMapped; + using value_type = typename Base::Ts::value_type; + using LookupResult = mapped_type *; + + using Base::Base; + + /// Merge every cell's value of current map into the destination map. + /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, a new cell gets emplaced into that map, + /// and func is invoked with the third argument emplaced set to true. Otherwise + /// emplaced is set to false. + template + void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) + { + if (this->m0.hasZero()) + { + const bool emplace_new_zero = !that.m0.hasZero(); + if (emplace_new_zero) + { + that.m0.setHasZero(); + } + + func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), + emplace_new_zero); + } + + this->m1.mergeToViaEmplace(that.m1, func); + this->m2.mergeToViaEmplace(that.m2, func); + this->m3.mergeToViaEmplace(that.m3, func); + this->ms.mergeToViaEmplace(that.ms, func); + } + + /// Merge every cell's value of current map into the destination map via find. + /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). + /// Each filled cell in current map will invoke func once. If that map doesn't + /// have a key equals to the given cell, func is invoked with the third argument + /// exist set to false. Otherwise exist is set to true. + template + void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) + { + if (this->m0.hasZero()) + { + if (that.m0.hasZero()) + { + func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true); + } + else + { + func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false); + } + } + + this->m1.mergeToViaFind(that.m1, func); + this->m2.mergeToViaFind(that.m2, func); + this->m3.mergeToViaFind(that.m3, func); + this->ms.mergeToViaFind(that.ms, func); + } + + mapped_type & ALWAYS_INLINE operator[](Key x) + { + bool inserted; + LookupResult it = nullptr; + emplace(x, it, inserted); + if (inserted) + new (it) mapped_type(); + return *it; + } + + template + void ALWAYS_INLINE forEachValue(Func && func) + { + if (this->m0.size()) + { + func(StringRef{}, this->m0.zeroValue()->getSecond()); + } + + for (auto & v : this->m1) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->m2) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->m3) + { + func(toStringRef(v.getFirst()), v.getSecond()); + } + + for (auto & v : this->ms) + { + func(v.getFirst(), v.getSecond()); + } + } + + template + void ALWAYS_INLINE forEachMapped(Func && func) + { + if (this->m0.size()) + func(this->m0.zeroValue()->getSecond()); + for (auto & v : this->m1) + func(v.getSecond()); + for (auto & v : this->m2) + func(v.getSecond()); + for (auto & v : this->m3) + func(v.getSecond()); + for (auto & v : this->ms) + func(v.getSecond()); + } +}; diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h new file mode 100644 index 0000000000000000000000000000000000000000..e8df4ec0fa3e7ed4a5dfdc323a72b522c3094054 --- /dev/null +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -0,0 +1,407 @@ +#pragma once + +#include +#include + +#define CASE_1_8 \ + case 1: \ + case 2: \ + case 3: \ + case 4: \ + case 5: \ + case 6: \ + case 7: \ + case 8 + +#define CASE_9_16 \ + case 9: \ + case 10: \ + case 11: \ + case 12: \ + case 13: \ + case 14: \ + case 15: \ + case 16 + +#define CASE_17_24 \ + case 17: \ + case 18: \ + case 19: \ + case 20: \ + case 21: \ + case 22: \ + case 23: \ + case 24 + +struct StringKey0 +{ +}; + +using StringKey8 = UInt64; +using StringKey16 = DB::UInt128; +struct StringKey24 +{ + UInt64 a; + UInt64 b; + UInt64 c; + + bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } + bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); } + bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; } + bool operator!=(const UInt64 rhs) const { return !operator==(rhs); } + + StringKey24 & operator=(const UInt64 rhs) + { + a = rhs; + b = 0; + c = 0; + return *this; + } +}; + +inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) +{ + return {reinterpret_cast(&n), 8ul - (__builtin_clzll(n) >> 3)}; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n) +{ + return {reinterpret_cast(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; +} +inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) +{ + return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; +} +inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s) +{ + return s; +} + +struct StringHashTableHash +{ +#if defined(__SSE4_2__) + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key); + return res; + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.low); + res = _mm_crc32_u64(res, key.high); + return res; + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + size_t res = -1ULL; + res = _mm_crc32_u64(res, key.a); + res = _mm_crc32_u64(res, key.b); + res = _mm_crc32_u64(res, key.c); + return res; + } +#else + size_t ALWAYS_INLINE operator()(StringKey8 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 8); + } + size_t ALWAYS_INLINE operator()(StringKey16 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 16); + } + size_t ALWAYS_INLINE operator()(StringKey24 key) const + { + return CityHash_v1_0_2::CityHash64(reinterpret_cast(&key), 24); + } +#endif + size_t ALWAYS_INLINE operator()(StringRef key) const + { + return StringRefHash()(key); + } +}; + +template +struct StringHashTableEmpty +{ + using Self = StringHashTableEmpty; + + bool has_zero = false; + std::aligned_storage_t zero_value_storage; /// Storage of element with zero key. + +public: + bool hasZero() const { return has_zero; } + + void setHasZero() + { + has_zero = true; + new (zeroValue()) Cell(); + } + + void setHasZero(const Cell & other) + { + has_zero = true; + new (zeroValue()) Cell(other); + } + + void clearHasZero() + { + has_zero = false; + if (!std::is_trivially_destructible_v) + zeroValue()->~Cell(); + } + + Cell * zeroValue() { return reinterpret_cast(&zero_value_storage); } + const Cell * zeroValue() const { return reinterpret_cast(&zero_value_storage); } + + using LookupResult = Cell *; + using ConstLookupResult = const Cell *; + + template + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */) + { + if (!hasZero()) + { + setHasZero(); + inserted = true; + } + else + inserted = false; + it = zeroValue(); + } + + template + LookupResult ALWAYS_INLINE find(Key, size_t /* hash */) + { + return hasZero() ? zeroValue() : nullptr; + } + + + void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } + void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } + void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); } + void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); } + size_t size() const { return hasZero() ? 1 : 0; } + bool empty() const { return !hasZero(); } + size_t getBufferSizeInBytes() const { return sizeof(Cell); } + size_t getCollisions() const { return 0; } +}; + +template +struct StringHashTableGrower : public HashTableGrower +{ + // Smooth growing for string maps + void increaseSize() { this->size_degree += 1; } +}; + +template +class StringHashTable : private boost::noncopyable +{ +protected: + static constexpr size_t NUM_MAPS = 5; + // Map for storing empty string + using T0 = typename SubMaps::T0; + + // Short strings are stored as numbers + using T1 = typename SubMaps::T1; + using T2 = typename SubMaps::T2; + using T3 = typename SubMaps::T3; + + // Long strings are stored as StringRef along with saved hash + using Ts = typename SubMaps::Ts; + using Self = StringHashTable; + + template + friend class TwoLevelStringHashTable; + + T0 m0; + T1 m1; + T2 m2; + T3 m3; + Ts ms; + +public: + using Key = StringRef; + using key_type = Key; + using value_type = typename Ts::value_type; + using LookupResult = typename Ts::mapped_type *; + + StringHashTable() {} + + StringHashTable(size_t reserve_for_num_elements) + : m1{reserve_for_num_elements / 4} + , m2{reserve_for_num_elements / 4} + , m3{reserve_for_num_elements / 4} + , ms{reserve_for_num_elements / 4} + { + } + + StringHashTable(StringHashTable && rhs) { *this = std::move(rhs); } + ~StringHashTable() {} + +public: + // Dispatch is written in a way that maximizes the performance: + // 1. Always memcpy 8 times bytes + // 2. Use switch case extension to generate fast dispatching table + // 3. Combine hash computation along with key loading + // 4. Funcs are named callables that can be force_inlined + // NOTE: It relies on Little Endianness and SSE4.2 + template + decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + { + static constexpr StringKey0 key0{}; + const StringRef & x = keyHolderGetKey(key_holder); + size_t sz = x.size; + const char * p = x.data; + // pending bits that needs to be shifted out + char s = (-sz & 7) * 8; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + StringHashTableHash hash; + switch (sz) + { + case 0: + keyHolderDiscardKey(key_holder); + return func(m0, key0, 0); + CASE_1_8 : { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + keyHolderDiscardKey(key_holder); + return func(m1, k8, hash(k8)); + } + CASE_9_16 : { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + keyHolderDiscardKey(key_holder); + return func(m2, k16, hash(k16)); + } + CASE_17_24 : { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + keyHolderDiscardKey(key_holder); + return func(m3, k24, hash(k24)); + } + default: { + return func(ms, std::forward(key_holder), hash(x)); + } + } + } + + struct EmplaceCallable + { + LookupResult & mapped; + bool & inserted; + + EmplaceCallable(LookupResult & mapped_, bool & inserted_) + : mapped(mapped_), inserted(inserted_) {} + + template + void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + { + typename Map::LookupResult result; + map.emplace(key_holder, result, inserted, hash); + mapped = lookupResultGetMapped(result); + } + }; + + template + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + this->dispatch(key_holder, EmplaceCallable(it, inserted)); + } + + struct FindCallable + { + template + LookupResult ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash) + { + return lookupResultGetMapped(map.find(keyHolderGetKey(key_holder), hash)); + } + }; + + LookupResult ALWAYS_INLINE find(Key x) + { + return dispatch(x, FindCallable{}); + } + + void write(DB::WriteBuffer & wb) const + { + m0.write(wb); + m1.write(wb); + m2.write(wb); + m3.write(wb); + ms.write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + m0.writeText(wb); + DB::writeChar(',', wb); + m1.writeText(wb); + DB::writeChar(',', wb); + m2.writeText(wb); + DB::writeChar(',', wb); + m3.writeText(wb); + DB::writeChar(',', wb); + ms.writeText(wb); + } + + void read(DB::ReadBuffer & rb) + { + m0.read(rb); + m1.read(rb); + m2.read(rb); + m3.read(rb); + ms.read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + m0.readText(rb); + DB::assertChar(',', rb); + m1.readText(rb); + DB::assertChar(',', rb); + m2.readText(rb); + DB::assertChar(',', rb); + m3.readText(rb); + DB::assertChar(',', rb); + ms.readText(rb); + } + + size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); } + + bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); } + + size_t getBufferSizeInBytes() const + { + return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes() + + ms.getBufferSizeInBytes(); + } + + void clearAndShrink() + { + m1.clearHasZero(); + m1.clearAndShrink(); + m2.clearAndShrink(); + m3.clearAndShrink(); + ms.clearAndShrink(); + } +}; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h new file mode 100644 index 0000000000000000000000000000000000000000..29bc4b394a7fd4967911c08d3b5426e1614a9827 --- /dev/null +++ b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +template typename ImplTable = StringHashMap> +class TwoLevelStringHashMap : public TwoLevelStringHashTable, ImplTable> +{ +public: + using Key = StringRef; + using key_type = Key; + using Self = TwoLevelStringHashMap; + using Base = TwoLevelStringHashTable, StringHashMap>; + using Base::Base; + using typename Base::Impl; + using mapped_type = TMapped; + using value_type = typename Base::value_type; + + using LookupResult = typename Base::LookupResult; + + template + void ALWAYS_INLINE forEachMapped(Func && func) + { + for (auto i = 0u; i < this->NUM_BUCKETS; ++i) + return this->impls[i].forEachMapped(func); + } + + mapped_type & ALWAYS_INLINE operator[](Key x) + { + bool inserted; + LookupResult it; + emplace(x, it, inserted); + if (inserted) + new (lookupResultGetMapped(it)) mapped_type(); + return *lookupResultGetMapped(it); + } +}; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h new file mode 100644 index 0000000000000000000000000000000000000000..ed1e1b018570ca030a1a7c769fa2e6d626192fa7 --- /dev/null +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -0,0 +1,217 @@ +#pragma once + +#include + +template , size_t BITS_FOR_BUCKET = 8> +class TwoLevelStringHashTable : private boost::noncopyable +{ +protected: + using HashValue = size_t; + using Self = TwoLevelStringHashTable; + +public: + using Key = StringRef; + using Impl = ImplTable; + + static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + + // TODO: currently hashing contains redundant computations when doing distributed or external aggregations + size_t hash(const Key & x) const + { + return const_cast(*this).dispatch(x, + [&](const auto &, const auto &, size_t hash) { return hash; }); + } + + size_t operator()(const Key & x) const { return hash(x); } + + /// NOTE Bad for hash tables with more than 2^32 cells. + static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } + +public: + using key_type = typename Impl::key_type; + using value_type = typename Impl::value_type; + using LookupResult = typename Impl::LookupResult; + + Impl impls[NUM_BUCKETS]; + + TwoLevelStringHashTable() {} + + template + TwoLevelStringHashTable(const Source & src) + { + if (src.m0.hasZero()) + impls[0].m0.setHasZero(*src.m0.zeroValue()); + + for (auto & v : src.m1) + { + size_t hash_value = v.getHash(src.m1); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m1.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m2) + { + size_t hash_value = v.getHash(src.m2); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m2.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.m3) + { + size_t hash_value = v.getHash(src.m3); + size_t buck = getBucketFromHash(hash_value); + impls[buck].m3.insertUniqueNonZero(&v, hash_value); + } + for (auto & v : src.ms) + { + size_t hash_value = v.getHash(src.ms); + size_t buck = getBucketFromHash(hash_value); + impls[buck].ms.insertUniqueNonZero(&v, hash_value); + } + } + + // Dispatch is written in a way that maximizes the performance: + // 1. Always memcpy 8 times bytes + // 2. Use switch case extension to generate fast dispatching table + // 3. Combine hash computation along with bucket computation and key loading + // 4. Funcs are named callables that can be force_inlined + // NOTE: It relies on Little Endianness and SSE4.2 + template + decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + { + static constexpr StringKey0 key0{}; + const StringRef & x = keyHolderGetKey(key_holder); + size_t sz = x.size; + const char * p = x.data; + // pending bits that needs to be shifted out + char s = (-sz & 7) * 8; + size_t res = -1ULL; + size_t buck; + union + { + StringKey8 k8; + StringKey16 k16; + StringKey24 k24; + UInt64 n[3]; + }; + StringHashTableHash hash; + switch (sz) + { + case 0: + keyHolderDiscardKey(key_holder); + return func(impls[0].m0, key0, 0); + CASE_1_8 : { + // first half page + if ((reinterpret_cast(p) & 2048) == 0) + { + memcpy(&n[0], p, 8); + n[0] &= -1ul >> s; + } + else + { + const char * lp = x.data + x.size - 8; + memcpy(&n[0], lp, 8); + n[0] >>= s; + } + res = hash(k8); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m1, k8, res); + } + CASE_9_16 : { + memcpy(&n[0], p, 8); + const char * lp = x.data + x.size - 8; + memcpy(&n[1], lp, 8); + n[1] >>= s; + res = hash(k16); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m2, k16, res); + } + CASE_17_24 : { + memcpy(&n[0], p, 16); + const char * lp = x.data + x.size - 8; + memcpy(&n[2], lp, 8); + n[2] >>= s; + res = hash(k24); + buck = getBucketFromHash(res); + keyHolderDiscardKey(key_holder); + return func(impls[buck].m3, k24, res); + } + default: { + res = hash(x); + buck = getBucketFromHash(res); + return func(impls[buck].ms, std::forward(key_holder), res); + } + } + } + + template + void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) + { + dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted}); + } + + LookupResult ALWAYS_INLINE find(Key x) + { + return dispatch(x, typename Impl::FindCallable{}); + } + + void write(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].write(wb); + } + + void writeText(DB::WriteBuffer & wb) const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::writeChar(',', wb); + impls[i].writeText(wb); + } + } + + void read(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + impls[i].read(rb); + } + + void readText(DB::ReadBuffer & rb) + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + { + if (i != 0) + DB::assertChar(',', rb); + impls[i].readText(rb); + } + } + + size_t size() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].size(); + + return res; + } + + bool empty() const + { + for (size_t i = 0; i < NUM_BUCKETS; ++i) + if (!impls[i].empty()) + return false; + + return true; + } + + size_t getBufferSizeInBytes() const + { + size_t res = 0; + for (size_t i = 0; i < NUM_BUCKETS; ++i) + res += impls[i].getBufferSizeInBytes(); + + return res; + } +}; diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index a1369e2fa44a0e7b0c5cef55e7a7e8fbdf11f580..0f0faaecb4471d6ab42e88ace9c319d586a16cab 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -11,6 +11,9 @@ #include #include #include +#include +#include + #include #include #include @@ -69,12 +72,20 @@ using AggregatedDataWithUInt8Key = FixedHashMap; using AggregatedDataWithUInt16Key = FixedHashMap; using AggregatedDataWithUInt64Key = HashMap>; + +using AggregatedDataWithShortStringKey = StringHashMap; + using AggregatedDataWithStringKey = HashMapWithSavedHash; + using AggregatedDataWithKeys128 = HashMap; using AggregatedDataWithKeys256 = HashMap; using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; + +using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; + using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; + using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap; using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap; @@ -139,6 +150,8 @@ struct AggregationDataWithNullKeyTwoLevel : public Base template using HashTableWithNullKey = AggregationDataWithNullKey>; +template +using StringHashTableWithNullKey = AggregationDataWithNullKey>; using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey; using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey; @@ -149,6 +162,10 @@ using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey, TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + +using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelStringHashMap>; + using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< TwoLevelHashMapWithSavedHash, TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; @@ -216,6 +233,32 @@ struct AggregationMethodString }; +/// Same as above but without cache +template +struct AggregationMethodStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + + AggregationMethodStringNoCache() {} + + template + AggregationMethodStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodString; + + static const bool low_cardinality_optimization = false; + + static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &) + { + key_columns[0]->insertData(key.data, key.size); + } +}; + + /// For the case where there is one fixed-length string key. template struct AggregationMethodFixedString @@ -241,6 +284,32 @@ struct AggregationMethodFixedString } }; +/// Same as above but without cache +template +struct AggregationMethodFixedStringNoCache +{ + using Data = TData; + using Key = typename Data::key_type; + using Mapped = typename Data::mapped_type; + + Data data; + + AggregationMethodFixedStringNoCache() {} + + template + AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {} + + using State = ColumnsHashing::HashMethodFixedString; + + static const bool low_cardinality_optimization = false; + + static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &) + { + key_columns[0]->insertData(key.data, key.size); + } +}; + + /// Single low cardinality column. template struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod @@ -434,16 +503,16 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> key32; std::unique_ptr> key64; - std::unique_ptr> key_string; - std::unique_ptr> key_fixed_string; + std::unique_ptr> key_string; + std::unique_ptr> key_fixed_string; std::unique_ptr> keys128; std::unique_ptr> keys256; std::unique_ptr> serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; - std::unique_ptr> key_string_two_level; - std::unique_ptr> key_fixed_string_two_level; + std::unique_ptr> key_string_two_level; + std::unique_ptr> key_fixed_string_two_level; std::unique_ptr> keys128_two_level; std::unique_ptr> keys256_two_level; std::unique_ptr> serialized_two_level; diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index e272525c7b7cbf3279b7d24089e906391ffabb92..da45c1a5153cb5cc06c0ebacc47787b3307681e0 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -37,6 +37,12 @@ add_executable (hash_map_string_small hash_map_string_small.cpp) target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) target_link_libraries (hash_map_string_small PRIVATE dbms) +add_executable (string_hash_map string_hash_map.cpp) +target_link_libraries (string_hash_map PRIVATE dbms) + +add_executable (string_hash_map_aggregation string_hash_map.cpp) +target_link_libraries (string_hash_map_aggregation PRIVATE dbms) + add_executable (two_level_hash_map two_level_hash_map.cpp) target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) target_link_libraries (two_level_hash_map PRIVATE dbms) diff --git a/dbms/src/Interpreters/tests/string_hash_map.cpp b/dbms/src/Interpreters/tests/string_hash_map.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b16e1a91aa58dd32941ace0be33c0c9f988a70dc --- /dev/null +++ b/dbms/src/Interpreters/tests/string_hash_map.cpp @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + +#include +#include + +using namespace std; + +int main() +{ + std::string s; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0, 25); + std::binomial_distribution binomial1(100, 0.01); + std::binomial_distribution binomial2(100, 0.02); + std::binomial_distribution binomial4(100, 0.04); + std::binomial_distribution binomial8(100, 0.08); + std::binomial_distribution binomial16(100, 0.16); + std::binomial_distribution binomial24(100, 0.24); + std::binomial_distribution binomial48(100, 0.48); + // 11GB + std::ofstream f("/tmp/terms.csv"); + size_t l1, l2, l4, l8, l16, l24, l48; + for (auto n = 0ul; n < 1e8; ++n) + { + l1 = binomial1(rng) + 1; + l2 = binomial2(rng) + l1 + 1; + l4 = binomial4(rng) + l2 + 1; + l8 = binomial8(rng) + l4 + 1; + l16 = binomial16(rng) + l8 + 1; + l24 = binomial24(rng) + l16 + 1; + l48 = binomial48(rng) + l24 + 1; + s.resize(l48); + for (auto i = 0ul; i < l48 - 1; ++i) + s[i] = 'a' + dist(rng); + s[l1 - 1] = ','; + s[l2 - 1] = ','; + s[l4 - 1] = ','; + s[l8 - 1] = ','; + s[l16 - 1] = ','; + s[l24 - 1] = ','; + s[l48 - 1] = '\n'; + f << s; + } + f.close(); + return 0; +} + +create table terms (term1 String, term2 String, term4 String, term8 String, term16 String, term24 String, term48 String) engine TinyLog; +insert into terms select * from file('/tmp/terms.csv', CSV, 'a String, b String, c String, d String, e String, f String, g String'); + +NOTE: for reliable test results, try isolating cpu cores and do python -m perf tune. Also bind numa nodes if any. +# isolate cpu 18 +dir=/home/amos/git/chorigin/data/data/default/terms +for file in term1 term2 term4 term8 term16 term24 term48; do + for size in 30000000 50000000 80000000 100000000; do + BEST_METHOD=0 + BEST_RESULT=0 + for method in {1..2}; do + echo -ne $file $size $method '' + numactl --membind=0 taskset -c 18 ./string_hash_map $size $method <"$dir"/"$file".bin 2>&1 | perl -nE 'say /([0-9\.]+) elem/g if /HashMap/' | tee /tmp/string_hash_map_res + CUR_RESULT=$(cat /tmp/string_hash_map_res | tr -d '.') + if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then + BEST_METHOD=$method + BEST_RESULT=$CUR_RESULT + fi + done + echo Best: $BEST_METHOD - $BEST_RESULT + done +done + +--------------------------- + +term1 30000000 1 68785770.85 term2 30000000 1 42531788.83 term4 30000000 1 14759901.41 term8 30000000 1 8072903.47 +term1 30000000 2 40812128.16 term2 30000000 2 21352402.10 term4 30000000 2 9008907.80 term8 30000000 2 5822641.82 +Best: 1 - 6878577085 Best: 1 - 4253178883 Best: 1 - 1475990141 Best: 1 - 807290347 +term1 50000000 1 68027542.41 term2 50000000 1 40493742.80 term4 50000000 1 16827650.85 term8 50000000 1 7405230.14 +term1 50000000 2 37589806.02 term2 50000000 2 19362975.09 term4 50000000 2 8278094.11 term8 50000000 2 5106810.80 +Best: 1 - 6802754241 Best: 1 - 4049374280 Best: 1 - 1682765085 Best: 1 - 740523014 +term1 80000000 1 68651875.88 term2 80000000 1 38253695.50 term4 80000000 1 15847177.93 term8 80000000 1 7536319.25 +term1 80000000 2 38092189.20 term2 80000000 2 20287003.01 term4 80000000 2 9322770.34 term8 80000000 2 4355572.15 +Best: 1 - 6865187588 Best: 1 - 3825369550 Best: 1 - 1584717793 Best: 1 - 753631925 +term1 100000000 1 68641941.59 term2 100000000 1 39120834.79 term4 100000000 1 16773904.90 term8 100000000 1 7147146.55 +term1 100000000 2 38358006.72 term2 100000000 2 20629363.17 term4 100000000 2 9665201.92 term8 100000000 2 4728255.07 +Best: 1 - 6864194159 Best: 1 - 3912083479 Best: 1 - 1677390490 Best: 1 - 714714655 + + +term16 30000000 1 6823029.35 term24 30000000 1 5706271.14 term48 30000000 1 4695716.47 +term16 30000000 2 5672283.33 term24 30000000 2 5498855.56 term48 30000000 2 4860537.26 +Best: 1 - 682302935 Best: 1 - 570627114 Best: 2 - 486053726 +term16 50000000 1 6214581.25 term24 50000000 1 5249785.66 term48 50000000 1 4282606.12 +term16 50000000 2 4990361.44 term24 50000000 2 4855552.24 term48 50000000 2 4348923.29 +Best: 1 - 621458125 Best: 1 - 524978566 Best: 2 - 434892329 +term16 80000000 1 5382855.70 term24 80000000 1 4580133.04 term48 80000000 1 3779436.15 +term16 80000000 2 4282192.79 term24 80000000 2 4178791.09 term48 80000000 2 3788409.72 +Best: 1 - 538285570 Best: 1 - 458013304 Best: 2 - 378840972 +term16 100000000 1 5930103.42 term24 100000000 1 5030621.52 term48 100000000 1 4084666.73 +term16 100000000 2 4621719.60 term24 100000000 2 4499866.83 term48 100000000 2 4067029.31 +Best: 1 - 593010342 Best: 1 - 503062152 Best: 1 - 408466673 + +*/ + + +using Value = uint64_t; + +template +void NO_INLINE bench(const std::vector & data, DB::Arena &, const char * name) +{ + // warm up + /* + { + Map map; + typename Map::LookupResult it; + bool inserted; + + for (size_t i = 0, size = data.size(); i < size; ++i) + { + auto key_holder = DB::ArenaKeyHolder{data[i], pool}; + map.emplace(key_holder, it, inserted); + if (inserted) + it->getSecond() = 0; + ++it->getSecond(); + } + } + */ + + std::cerr << "method " << name << std::endl; + for (auto t = 0ul; t < 7; ++t) + { + DB::Arena pool(128 * 1024 * 1024); + Stopwatch watch; + Map map; + typename Map::LookupResult it; + bool inserted; + + for (size_t i = 0, size = data.size(); i < size; ++i) + { + map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted); + if (inserted) + *lookupResultGetMapped(it) = 0; + ++*lookupResultGetMapped(it); + } + watch.stop(); + + std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl; + std::cerr << "single-run " << std::setprecision(3) + << watch.elapsedSeconds() << std::endl; + } +} + +/* +template +runFromFile() +{ + DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); + DB::CompressedReadBuffer in2(in1); + + Map map; + DB::Arena pool(128 * 1024 * 1024); + for (size_t i = 0; i < n && !in2.eof(); ++i) + { + auto key = DB::readStringBinaryInto(pool, in2); + + bool inserted; + Map::LookupResult mapped; + map.emplaceKeyHolder(DB::SerializedKeyHolder(key, pool), mapped, inserted); + } +} + +template +benchFromFile() +{ + double best_time = -1.; + for (auto t = 0ul; t < 50; ++t) + { + Stopwatch watch; + runFromFile(); + watch.stop(); + + if (best_time < 0 || best_time > watch.elapsedSeconds()) + { + best_time = watch.elapsedSeconds(); + } + } + + std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Elapsed: " << best_time << " (" << data.size() / best_time + << " elem/sec.)" << std::endl; +} +*/ + + +int main(int argc, char ** argv) +{ + if (argc < 3) + { + std::cerr << "Usage: program n m\n"; + return 1; + } + + size_t n = atoi(argv[1]); + size_t m = atoi(argv[2]); + + DB::Arena pool(128 * 1024 * 1024); + std::vector data(n); + + std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl; + + { + Stopwatch watch; + DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); + DB::CompressedReadBuffer in2(in1); + + std::string tmp; + for (size_t i = 0; i < n && !in2.eof(); ++i) + { + DB::readStringBinary(tmp, in2); + data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size()); + } + + watch.stop(); + std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " (" + << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; + } + + if (!m || m == 1) + bench>(data, pool, "StringHashMap"); + if (!m || m == 2) + bench>(data, pool, "HashMapWithSavedHash"); + if (!m || m == 3) + bench>(data, pool, "HashMap"); + return 0; +}