#pragma once #include template , size_t BITS_FOR_BUCKET = 8> class TwoLevelStringHashTable : private boost::noncopyable { protected: using HashValue = size_t; using Self = TwoLevelStringHashTable; public: using Key = StringRef; using Impl = ImplTable; static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; // TODO: currently hashing contains redundant computations when doing distributed or external aggregations size_t hash(const Key & x) const { return const_cast(*this).dispatch(x, [&](const auto &, const auto &, size_t hash) { return hash; }); } size_t operator()(const Key & x) const { return hash(x); } /// NOTE Bad for hash tables with more than 2^32 cells. static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; } public: using key_type = typename Impl::key_type; using value_type = typename Impl::value_type; using LookupResult = typename Impl::LookupResult; Impl impls[NUM_BUCKETS]; TwoLevelStringHashTable() {} template TwoLevelStringHashTable(const Source & src) { if (src.m0.hasZero()) impls[0].m0.setHasZero(*src.m0.zeroValue()); for (auto & v : src.m1) { size_t hash_value = v.getHash(src.m1); size_t buck = getBucketFromHash(hash_value); impls[buck].m1.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m2) { size_t hash_value = v.getHash(src.m2); size_t buck = getBucketFromHash(hash_value); impls[buck].m2.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.m3) { size_t hash_value = v.getHash(src.m3); size_t buck = getBucketFromHash(hash_value); impls[buck].m3.insertUniqueNonZero(&v, hash_value); } for (auto & v : src.ms) { size_t hash_value = v.getHash(src.ms); size_t buck = getBucketFromHash(hash_value); impls[buck].ms.insertUniqueNonZero(&v, hash_value); } } // Dispatch is written in a way that maximizes the performance: // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table // 3. Combine hash computation along with bucket computation and key loading // 4. Funcs are named callables that can be force_inlined // NOTE: It relies on Little Endianness and SSE4.2 template decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) { static constexpr StringKey0 key0{}; const StringRef & x = keyHolderGetKey(key_holder); size_t sz = x.size; const char * p = x.data; // pending bits that needs to be shifted out char s = (-sz & 7) * 8; size_t res = -1ULL; size_t buck; union { StringKey8 k8; StringKey16 k16; StringKey24 k24; UInt64 n[3]; }; StringHashTableHash hash; switch (sz) { case 0: keyHolderDiscardKey(key_holder); return func(impls[0].m0, key0, 0); CASE_1_8 : { // first half page if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); n[0] &= -1ul >> s; } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); n[0] >>= s; } res = hash(k8); buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m1, k8, res); } CASE_9_16 : { memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); n[1] >>= s; res = hash(k16); buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m2, k16, res); } CASE_17_24 : { memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); n[2] >>= s; res = hash(k24); buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(impls[buck].m3, k24, res); } default: { res = hash(x); buck = getBucketFromHash(res); return func(impls[buck].ms, std::forward(key_holder), res); } } } template void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) { dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted}); } LookupResult ALWAYS_INLINE find(Key x) { return dispatch(x, typename Impl::FindCallable{}); } void write(DB::WriteBuffer & wb) const { for (size_t i = 0; i < NUM_BUCKETS; ++i) impls[i].write(wb); } void writeText(DB::WriteBuffer & wb) const { for (size_t i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::writeChar(',', wb); impls[i].writeText(wb); } } void read(DB::ReadBuffer & rb) { for (size_t i = 0; i < NUM_BUCKETS; ++i) impls[i].read(rb); } void readText(DB::ReadBuffer & rb) { for (size_t i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::assertChar(',', rb); impls[i].readText(rb); } } size_t size() const { size_t res = 0; for (size_t i = 0; i < NUM_BUCKETS; ++i) res += impls[i].size(); return res; } bool empty() const { for (size_t i = 0; i < NUM_BUCKETS; ++i) if (!impls[i].empty()) return false; return true; } size_t getBufferSizeInBytes() const { size_t res = 0; for (size_t i = 0; i < NUM_BUCKETS; ++i) res += impls[i].getBufferSizeInBytes(); return res; } };