Introduce String Hash Map.

It speeds up aggregation over short string keys. Use it as a default aggregation method for string keys.

Introduce String Hash Map.
It speeds up aggregation over short string keys. Use it as a default aggregation method for string keys.
a8644478 · Amos Bird · Alexander Kuzmenkov · 5286d0af · a8644478 · a8644478
8 changed file
--- a/dbms/src/Common/HashTable/HashTable.h
+++ b/dbms/src/Common/HashTable/HashTable.h
@@ -358,6 +358,12 @@ protected:
    template <typename, typename, typename, typename, typename, typename, size_t>
    friend class TwoLevelHashTable;

+    template <typename, typename, size_t>
+    friend class TwoLevelStringHashTable;
+
+    template <typename SubMaps>
+    friend class StringHashTable;
+
    using HashValue = size_t;
    using Self = HashTable;
    using cell_type = Cell;

--- a/dbms/src/Common/HashTable/StringHashMap.h
+++ b/dbms/src/Common/HashTable/StringHashMap.h
+#pragma once
+
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashTableAllocator.h>
+#include <Common/HashTable/StringHashTable.h>
+
+template <typename Key, typename TMapped>
+struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
+{
+    using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
+    using Base::Base;
+    static constexpr bool need_zero_value_storage = false;
+};
+
+template<typename Key, typename Mapped>
+auto lookupResultGetMapped(StringHashMapCell<Key, Mapped> * cell) { return &cell->getSecond(); }
+
+template <typename TMapped>
+struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
+{
+    using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
+    using Base::Base;
+    static constexpr bool need_zero_value_storage = false;
+    bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
+    // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
+    static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
+    void setZero() { this->value.first.low = 0; }
+};
+
+template <typename TMapped>
+struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
+{
+    using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
+    using Base::Base;
+    static constexpr bool need_zero_value_storage = false;
+    bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
+    // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
+    static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
+    void setZero() { this->value.first.a = 0; }
+};
+
+template <typename TMapped>
+struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
+{
+    using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
+    using Base::Base;
+    static constexpr bool need_zero_value_storage = false;
+};
+
+template <typename TMapped, typename Allocator>
+struct StringHashMapSubMaps
+{
+    using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
+    using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+    using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+    using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+    using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
+};
+
+template <typename TMapped, typename Allocator = HashTableAllocator>
+class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
+{
+public:
+    using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
+    using Self = StringHashMap;
+    using Key = StringRef;
+    using key_type = StringRef;
+    using mapped_type = TMapped;
+    using value_type = typename Base::Ts::value_type;
+    using LookupResult = mapped_type *;
+
+    using Base::Base;
+
+    /// Merge every cell's value of current map into the destination map.
+    ///  Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
+    ///  Each filled cell in current map will invoke func once. If that map doesn't
+    ///  have a key equals to the given cell, a new cell gets emplaced into that map,
+    ///  and func is invoked with the third argument emplaced set to true. Otherwise
+    ///  emplaced is set to false.
+    template <typename Func>
+    void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
+    {
+        if (this->m0.hasZero())
+        {
+            const bool emplace_new_zero = !that.m0.hasZero();
+            if (emplace_new_zero)
+            {
+                that.m0.setHasZero();
+            }
+
+            func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(),
+                 emplace_new_zero);
+        }
+
+        this->m1.mergeToViaEmplace(that.m1, func);
+        this->m2.mergeToViaEmplace(that.m2, func);
+        this->m3.mergeToViaEmplace(that.m3, func);
+        this->ms.mergeToViaEmplace(that.ms, func);
+    }
+
+    /// Merge every cell's value of current map into the destination map via find.
+    ///  Func should have signature void(Mapped & dst, Mapped & src, bool exist).
+    ///  Each filled cell in current map will invoke func once. If that map doesn't
+    ///  have a key equals to the given cell, func is invoked with the third argument
+    ///  exist set to false. Otherwise exist is set to true.
+    template <typename Func>
+    void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
+    {
+        if (this->m0.hasZero())
+        {
+            if (that.m0.hasZero())
+            {
+                func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true);
+            }
+            else
+            {
+                func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false);
+            }
+        }
+
+        this->m1.mergeToViaFind(that.m1, func);
+        this->m2.mergeToViaFind(that.m2, func);
+        this->m3.mergeToViaFind(that.m3, func);
+        this->ms.mergeToViaFind(that.ms, func);
+    }
+
+    mapped_type & ALWAYS_INLINE operator[](Key x)
+    {
+        bool inserted;
+        LookupResult it = nullptr;
+        emplace(x, it, inserted);
+        if (inserted)
+            new (it) mapped_type();
+        return *it;
+    }
+
+    template <typename Func>
+    void ALWAYS_INLINE forEachValue(Func && func)
+    {
+        if (this->m0.size())
+        {
+            func(StringRef{}, this->m0.zeroValue()->getSecond());
+        }
+
+        for (auto & v : this->m1)
+        {
+            func(toStringRef(v.getFirst()), v.getSecond());
+        }
+
+        for (auto & v : this->m2)
+        {
+            func(toStringRef(v.getFirst()), v.getSecond());
+        }
+
+        for (auto & v : this->m3)
+        {
+            func(toStringRef(v.getFirst()), v.getSecond());
+        }
+
+        for (auto & v : this->ms)
+        {
+            func(v.getFirst(), v.getSecond());
+        }
+    }
+
+    template <typename Func>
+    void ALWAYS_INLINE forEachMapped(Func && func)
+    {
+        if (this->m0.size())
+            func(this->m0.zeroValue()->getSecond());
+        for (auto & v : this->m1)
+            func(v.getSecond());
+        for (auto & v : this->m2)
+            func(v.getSecond());
+        for (auto & v : this->m3)
+            func(v.getSecond());
+        for (auto & v : this->ms)
+            func(v.getSecond());
+    }
+};
--- a/dbms/src/Common/HashTable/StringHashTable.h
+++ b/dbms/src/Common/HashTable/StringHashTable.h
+#pragma once
+
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashTable.h>
+
+#define CASE_1_8 \
+    case 1: \
+    case 2: \
+    case 3: \
+    case 4: \
+    case 5: \
+    case 6: \
+    case 7: \
+    case 8
+
+#define CASE_9_16 \
+    case 9: \
+    case 10: \
+    case 11: \
+    case 12: \
+    case 13: \
+    case 14: \
+    case 15: \
+    case 16
+
+#define CASE_17_24 \
+    case 17: \
+    case 18: \
+    case 19: \
+    case 20: \
+    case 21: \
+    case 22: \
+    case 23: \
+    case 24
+
+struct StringKey0
+{
+};
+
+using StringKey8 = UInt64;
+using StringKey16 = DB::UInt128;
+struct StringKey24
+{
+    UInt64 a;
+    UInt64 b;
+    UInt64 c;
+
+    bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
+    bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); }
+    bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; }
+    bool operator!=(const UInt64 rhs) const { return !operator==(rhs); }
+
+    StringKey24 & operator=(const UInt64 rhs)
+    {
+        a = rhs;
+        b = 0;
+        c = 0;
+        return *this;
+    }
+};
+
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
+{
+    return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
+}
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
+{
+    return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.high) >> 3)};
+}
+inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
+{
+    return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
+}
+inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s)
+{
+    return s;
+}
+
+struct StringHashTableHash
+{
+#if defined(__SSE4_2__)
+    size_t ALWAYS_INLINE operator()(StringKey8 key) const
+    {
+        size_t res = -1ULL;
+        res = _mm_crc32_u64(res, key);
+        return res;
+    }
+    size_t ALWAYS_INLINE operator()(StringKey16 key) const
+    {
+        size_t res = -1ULL;
+        res = _mm_crc32_u64(res, key.low);
+        res = _mm_crc32_u64(res, key.high);
+        return res;
+    }
+    size_t ALWAYS_INLINE operator()(StringKey24 key) const
+    {
+        size_t res = -1ULL;
+        res = _mm_crc32_u64(res, key.a);
+        res = _mm_crc32_u64(res, key.b);
+        res = _mm_crc32_u64(res, key.c);
+        return res;
+    }
+#else
+    size_t ALWAYS_INLINE operator()(StringKey8 key) const
+    {
+        return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
+    }
+    size_t ALWAYS_INLINE operator()(StringKey16 key) const
+    {
+        return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
+    }
+    size_t ALWAYS_INLINE operator()(StringKey24 key) const
+    {
+        return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
+    }
+#endif
+    size_t ALWAYS_INLINE operator()(StringRef key) const
+    {
+        return StringRefHash()(key);
+    }
+};
+
+template <typename Cell>
+struct StringHashTableEmpty
+{
+    using Self = StringHashTableEmpty;
+
+    bool has_zero = false;
+    std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
+
+public:
+    bool hasZero() const { return has_zero; }
+
+    void setHasZero()
+    {
+        has_zero = true;
+        new (zeroValue()) Cell();
+    }
+
+    void setHasZero(const Cell & other)
+    {
+        has_zero = true;
+        new (zeroValue()) Cell(other);
+    }
+
+    void clearHasZero()
+    {
+        has_zero = false;
+        if (!std::is_trivially_destructible_v<Cell>)
+            zeroValue()->~Cell();
+    }
+
+    Cell * zeroValue() { return reinterpret_cast<Cell *>(&zero_value_storage); }
+    const Cell * zeroValue() const { return reinterpret_cast<const Cell *>(&zero_value_storage); }
+
+    using LookupResult = Cell *;
+    using ConstLookupResult = const Cell *;
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */)
+    {
+        if (!hasZero())
+        {
+            setHasZero();
+            inserted = true;
+        }
+        else
+            inserted = false;
+        it = zeroValue();
+    }
+
+    template <typename Key>
+    LookupResult ALWAYS_INLINE find(Key, size_t /* hash */)
+    {
+        return hasZero() ? zeroValue() : nullptr;
+    }
+
+
+    void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
+    void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
+    void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
+    void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
+    size_t size() const { return hasZero() ? 1 : 0; }
+    bool empty() const { return !hasZero(); }
+    size_t getBufferSizeInBytes() const { return sizeof(Cell); }
+    size_t getCollisions() const { return 0; }
+};
+
+template <size_t initial_size_degree = 8>
+struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
+{
+    // Smooth growing for string maps
+    void increaseSize() { this->size_degree += 1; }
+};
+
+template <typename SubMaps>
+class StringHashTable : private boost::noncopyable
+{
+protected:
+    static constexpr size_t NUM_MAPS = 5;
+    // Map for storing empty string
+    using T0 = typename SubMaps::T0;
+
+    // Short strings are stored as numbers
+    using T1 = typename SubMaps::T1;
+    using T2 = typename SubMaps::T2;
+    using T3 = typename SubMaps::T3;
+
+    // Long strings are stored as StringRef along with saved hash
+    using Ts = typename SubMaps::Ts;
+    using Self = StringHashTable;
+
+    template <typename, typename, size_t>
+    friend class TwoLevelStringHashTable;
+
+    T0 m0;
+    T1 m1;
+    T2 m2;
+    T3 m3;
+    Ts ms;
+
+public:
+    using Key = StringRef;
+    using key_type = Key;
+    using value_type = typename Ts::value_type;
+    using LookupResult = typename Ts::mapped_type *;
+
+    StringHashTable() {}
+
+    StringHashTable(size_t reserve_for_num_elements)
+        : m1{reserve_for_num_elements / 4}
+        , m2{reserve_for_num_elements / 4}
+        , m3{reserve_for_num_elements / 4}
+        , ms{reserve_for_num_elements / 4}
+    {
+    }
+
+    StringHashTable(StringHashTable && rhs) { *this = std::move(rhs); }
+    ~StringHashTable() {}
+
+public:
+    // Dispatch is written in a way that maximizes the performance:
+    // 1. Always memcpy 8 times bytes
+    // 2. Use switch case extension to generate fast dispatching table
+    // 3. Combine hash computation along with key loading
+    // 4. Funcs are named callables that can be force_inlined
+    // NOTE: It relies on Little Endianness and SSE4.2
+    template <typename KeyHolder, typename Func>
+    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
+    {
+        static constexpr StringKey0 key0{};
+        const StringRef & x = keyHolderGetKey(key_holder);
+        size_t sz = x.size;
+        const char * p = x.data;
+        // pending bits that needs to be shifted out
+        char s = (-sz & 7) * 8;
+        union
+        {
+            StringKey8 k8;
+            StringKey16 k16;
+            StringKey24 k24;
+            UInt64 n[3];
+        };
+        StringHashTableHash hash;
+        switch (sz)
+        {
+            case 0:
+                keyHolderDiscardKey(key_holder);
+                return func(m0, key0, 0);
+            CASE_1_8 : {
+                // first half page
+                if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
+                {
+                    memcpy(&n[0], p, 8);
+                    n[0] &= -1ul >> s;
+                }
+                else
+                {
+                    const char * lp = x.data + x.size - 8;
+                    memcpy(&n[0], lp, 8);
+                    n[0] >>= s;
+                }
+                keyHolderDiscardKey(key_holder);
+                return func(m1, k8, hash(k8));
+            }
+            CASE_9_16 : {
+                memcpy(&n[0], p, 8);
+                const char * lp = x.data + x.size - 8;
+                memcpy(&n[1], lp, 8);
+                n[1] >>= s;
+                keyHolderDiscardKey(key_holder);
+                return func(m2, k16, hash(k16));
+            }
+            CASE_17_24 : {
+                memcpy(&n[0], p, 16);
+                const char * lp = x.data + x.size - 8;
+                memcpy(&n[2], lp, 8);
+                n[2] >>= s;
+                keyHolderDiscardKey(key_holder);
+                return func(m3, k24, hash(k24));
+            }
+            default: {
+                return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
+            }
+        }
+    }
+
+    struct EmplaceCallable
+    {
+        LookupResult & mapped;
+        bool & inserted;
+
+        EmplaceCallable(LookupResult & mapped_, bool & inserted_)
+            : mapped(mapped_), inserted(inserted_) {}
+
+        template <typename Map, typename KeyHolder>
+        void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
+        {
+            typename Map::LookupResult result;
+            map.emplace(key_holder, result, inserted, hash);
+            mapped = lookupResultGetMapped(result);
+        }
+    };
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
+    {
+        this->dispatch(key_holder, EmplaceCallable(it, inserted));
+    }
+
+    struct FindCallable
+    {
+        template <typename Map, typename KeyHolder>
+        LookupResult ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
+        {
+            return lookupResultGetMapped(map.find(keyHolderGetKey(key_holder), hash));
+        }
+    };
+
+    LookupResult ALWAYS_INLINE find(Key x)
+    {
+        return dispatch(x, FindCallable{});
+    }
+
+    void write(DB::WriteBuffer & wb) const
+    {
+        m0.write(wb);
+        m1.write(wb);
+        m2.write(wb);
+        m3.write(wb);
+        ms.write(wb);
+    }
+
+    void writeText(DB::WriteBuffer & wb) const
+    {
+        m0.writeText(wb);
+        DB::writeChar(',', wb);
+        m1.writeText(wb);
+        DB::writeChar(',', wb);
+        m2.writeText(wb);
+        DB::writeChar(',', wb);
+        m3.writeText(wb);
+        DB::writeChar(',', wb);
+        ms.writeText(wb);
+    }
+
+    void read(DB::ReadBuffer & rb)
+    {
+        m0.read(rb);
+        m1.read(rb);
+        m2.read(rb);
+        m3.read(rb);
+        ms.read(rb);
+    }
+
+    void readText(DB::ReadBuffer & rb)
+    {
+        m0.readText(rb);
+        DB::assertChar(',', rb);
+        m1.readText(rb);
+        DB::assertChar(',', rb);
+        m2.readText(rb);
+        DB::assertChar(',', rb);
+        m3.readText(rb);
+        DB::assertChar(',', rb);
+        ms.readText(rb);
+    }
+
+    size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
+
+    bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
+
+    size_t getBufferSizeInBytes() const
+    {
+        return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
+            + ms.getBufferSizeInBytes();
+    }
+
+    void clearAndShrink()
+    {
+        m1.clearHasZero();
+        m1.clearAndShrink();
+        m2.clearAndShrink();
+        m3.clearAndShrink();
+        ms.clearAndShrink();
+    }
+};
--- a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h
+++ b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h
+#pragma once
+
+#include <Common/HashTable/StringHashMap.h>
+#include <Common/HashTable/TwoLevelStringHashTable.h>
+
+template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
+class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
+{
+public:
+    using Key = StringRef;
+    using key_type = Key;
+    using Self = TwoLevelStringHashMap;
+    using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
+    using Base::Base;
+    using typename Base::Impl;
+    using mapped_type = TMapped;
+    using value_type = typename Base::value_type;
+
+    using LookupResult = typename Base::LookupResult;
+
+    template <typename Func>
+    void ALWAYS_INLINE forEachMapped(Func && func)
+    {
+        for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
+            return this->impls[i].forEachMapped(func);
+    }
+
+    mapped_type & ALWAYS_INLINE operator[](Key x)
+    {
+        bool inserted;
+        LookupResult it;
+        emplace(x, it, inserted);
+        if (inserted)
+            new (lookupResultGetMapped(it)) mapped_type();
+        return *lookupResultGetMapped(it);
+    }
+};
--- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
+#pragma once
+
+#include <Common/HashTable/StringHashTable.h>
+
+template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
+class TwoLevelStringHashTable : private boost::noncopyable
+{
+protected:
+    using HashValue = size_t;
+    using Self = TwoLevelStringHashTable;
+
+public:
+    using Key = StringRef;
+    using Impl = ImplTable;
+
+    static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
+    static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
+
+    // TODO: currently hashing contains redundant computations when doing distributed or external aggregations
+    size_t hash(const Key & x) const
+    {
+        return const_cast<Self &>(*this).dispatch(x,
+            [&](const auto &, const auto &, size_t hash) { return hash; });
+    }
+
+    size_t operator()(const Key & x) const { return hash(x); }
+
+    /// NOTE Bad for hash tables with more than 2^32 cells.
+    static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
+
+public:
+    using key_type = typename Impl::key_type;
+    using value_type = typename Impl::value_type;
+    using LookupResult = typename Impl::LookupResult;
+
+    Impl impls[NUM_BUCKETS];
+
+    TwoLevelStringHashTable() {}
+
+    template <typename Source>
+    TwoLevelStringHashTable(const Source & src)
+    {
+        if (src.m0.hasZero())
+            impls[0].m0.setHasZero(*src.m0.zeroValue());
+
+        for (auto & v : src.m1)
+        {
+            size_t hash_value = v.getHash(src.m1);
+            size_t buck = getBucketFromHash(hash_value);
+            impls[buck].m1.insertUniqueNonZero(&v, hash_value);
+        }
+        for (auto & v : src.m2)
+        {
+            size_t hash_value = v.getHash(src.m2);
+            size_t buck = getBucketFromHash(hash_value);
+            impls[buck].m2.insertUniqueNonZero(&v, hash_value);
+        }
+        for (auto & v : src.m3)
+        {
+            size_t hash_value = v.getHash(src.m3);
+            size_t buck = getBucketFromHash(hash_value);
+            impls[buck].m3.insertUniqueNonZero(&v, hash_value);
+        }
+        for (auto & v : src.ms)
+        {
+            size_t hash_value = v.getHash(src.ms);
+            size_t buck = getBucketFromHash(hash_value);
+            impls[buck].ms.insertUniqueNonZero(&v, hash_value);
+        }
+    }
+
+    // Dispatch is written in a way that maximizes the performance:
+    // 1. Always memcpy 8 times bytes
+    // 2. Use switch case extension to generate fast dispatching table
+    // 3. Combine hash computation along with bucket computation and key loading
+    // 4. Funcs are named callables that can be force_inlined
+    // NOTE: It relies on Little Endianness and SSE4.2
+    template <typename Func, typename KeyHolder>
+    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
+    {
+        static constexpr StringKey0 key0{};
+        const StringRef & x = keyHolderGetKey(key_holder);
+        size_t sz = x.size;
+        const char * p = x.data;
+        // pending bits that needs to be shifted out
+        char s = (-sz & 7) * 8;
+        size_t res = -1ULL;
+        size_t buck;
+        union
+        {
+            StringKey8 k8;
+            StringKey16 k16;
+            StringKey24 k24;
+            UInt64 n[3];
+        };
+        StringHashTableHash hash;
+        switch (sz)
+        {
+            case 0:
+                keyHolderDiscardKey(key_holder);
+                return func(impls[0].m0, key0, 0);
+            CASE_1_8 : {
+                // first half page
+                if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
+                {
+                    memcpy(&n[0], p, 8);
+                    n[0] &= -1ul >> s;
+                }
+                else
+                {
+                    const char * lp = x.data + x.size - 8;
+                    memcpy(&n[0], lp, 8);
+                    n[0] >>= s;
+                }
+                res = hash(k8);
+                buck = getBucketFromHash(res);
+                keyHolderDiscardKey(key_holder);
+                return func(impls[buck].m1, k8, res);
+            }
+            CASE_9_16 : {
+                memcpy(&n[0], p, 8);
+                const char * lp = x.data + x.size - 8;
+                memcpy(&n[1], lp, 8);
+                n[1] >>= s;
+                res = hash(k16);
+                buck = getBucketFromHash(res);
+                keyHolderDiscardKey(key_holder);
+                return func(impls[buck].m2, k16, res);
+            }
+            CASE_17_24 : {
+                memcpy(&n[0], p, 16);
+                const char * lp = x.data + x.size - 8;
+                memcpy(&n[2], lp, 8);
+                n[2] >>= s;
+                res = hash(k24);
+                buck = getBucketFromHash(res);
+                keyHolderDiscardKey(key_holder);
+                return func(impls[buck].m3, k24, res);
+            }
+            default: {
+                res = hash(x);
+                buck = getBucketFromHash(res);
+                return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
+            }
+        }
+    }
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
+    {
+        dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted});
+    }
+
+    LookupResult ALWAYS_INLINE find(Key x)
+    {
+        return dispatch(x, typename Impl::FindCallable{});
+    }
+
+    void write(DB::WriteBuffer & wb) const
+    {
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+            impls[i].write(wb);
+    }
+
+    void writeText(DB::WriteBuffer & wb) const
+    {
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+        {
+            if (i != 0)
+                DB::writeChar(',', wb);
+            impls[i].writeText(wb);
+        }
+    }
+
+    void read(DB::ReadBuffer & rb)
+    {
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+            impls[i].read(rb);
+    }
+
+    void readText(DB::ReadBuffer & rb)
+    {
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+        {
+            if (i != 0)
+                DB::assertChar(',', rb);
+            impls[i].readText(rb);
+        }
+    }
+
+    size_t size() const
+    {
+        size_t res = 0;
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+            res += impls[i].size();
+
+        return res;
+    }
+
+    bool empty() const
+    {
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+            if (!impls[i].empty())
+                return false;
+
+        return true;
+    }
+
+    size_t getBufferSizeInBytes() const
+    {
+        size_t res = 0;
+        for (size_t i = 0; i < NUM_BUCKETS; ++i)
+            res += impls[i].getBufferSizeInBytes();
+
+        return res;
+    }
+};
--- a/dbms/src/Interpreters/Aggregator.h
+++ b/dbms/src/Interpreters/Aggregator.h
@@ -11,6 +11,9 @@
 #include <Common/HashTable/FixedHashMap.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/TwoLevelHashMap.h>
+#include <Common/HashTable/StringHashMap.h>
+#include <Common/HashTable/TwoLevelStringHashMap.h>
+
 #include <Common/ThreadPool.h>
 #include <Common/UInt128.h>
 #include <Common/LRUCache.h>
@@ -69,12 +72,20 @@ using AggregatedDataWithUInt8Key = FixedHashMap<UInt8, AggregateDataPtr>;
 using AggregatedDataWithUInt16Key = FixedHashMap<UInt16, AggregateDataPtr>;

 using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
+
+using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
+
 using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
+
 using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
 using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;

 using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
+
+using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
+
 using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;
+
 using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
 using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;

@@ -139,6 +150,8 @@ struct AggregationDataWithNullKeyTwoLevel : public Base

 template <typename ... Types>
 using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>;
+template <typename ... Types>
+using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>;

 using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
 using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
@@ -149,6 +162,10 @@ using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<Aggregate
 using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
        TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
        TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
+
+using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
+        TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>;
+
 using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
        TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
        TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
@@ -216,6 +233,32 @@ struct AggregationMethodString
 };


+/// Same as above but without cache
+template <typename TData>
+struct AggregationMethodStringNoCache
+{
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+
+    Data data;
+
+    AggregationMethodStringNoCache() {}
+
+    template <typename Other>
+    AggregationMethodStringNoCache(const Other & other) : data(other.data) {}
+
+    using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
+
+    static const bool low_cardinality_optimization = false;
+
+    static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &)
+    {
+        key_columns[0]->insertData(key.data, key.size);
+    }
+};
+
+
 /// For the case where there is one fixed-length string key.
 template <typename TData>
 struct AggregationMethodFixedString
@@ -241,6 +284,32 @@ struct AggregationMethodFixedString
    }
 };

+/// Same as above but without cache
+template <typename TData>
+struct AggregationMethodFixedStringNoCache
+{
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+
+    Data data;
+
+    AggregationMethodFixedStringNoCache() {}
+
+    template <typename Other>
+    AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {}
+
+    using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
+
+    static const bool low_cardinality_optimization = false;
+
+    static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &)
+    {
+        key_columns[0]->insertData(key.data, key.size);
+    }
+};
+
+
 /// Single low cardinality column.
 template <typename SingleColumnMethod>
 struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
@@ -434,16 +503,16 @@ struct AggregatedDataVariants : private boost::noncopyable

    std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>>         key32;
    std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>>         key64;
-    std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKey>>                    key_string;
-    std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKey>>               key_fixed_string;
+    std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>>               key_string;
+    std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>>          key_fixed_string;
    std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>>                   keys128;
    std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>>                   keys256;
    std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>>                serialized;

    std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level;
    std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level;
-    std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyTwoLevel>>            key_string_two_level;
-    std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyTwoLevel>>       key_fixed_string_two_level;
+    std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>>       key_string_two_level;
+    std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>>  key_fixed_string_two_level;
    std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>>           keys128_two_level;
    std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>>           keys256_two_level;
    std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>>        serialized_two_level;

--- a/dbms/src/Interpreters/tests/CMakeLists.txt
+++ b/dbms/src/Interpreters/tests/CMakeLists.txt
@@ -37,6 +37,12 @@ add_executable (hash_map_string_small hash_map_string_small.cpp)
 target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 target_link_libraries (hash_map_string_small PRIVATE dbms)

+add_executable (string_hash_map string_hash_map.cpp)
+target_link_libraries (string_hash_map PRIVATE dbms)
+
+add_executable (string_hash_map_aggregation string_hash_map.cpp)
+target_link_libraries (string_hash_map_aggregation PRIVATE dbms)
+
 add_executable (two_level_hash_map two_level_hash_map.cpp)
 target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 target_link_libraries (two_level_hash_map PRIVATE dbms)

--- a/dbms/src/Interpreters/tests/string_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/string_hash_map.cpp
+#include <iomanip>
+#include <iostream>
+#include <vector>
+#include <Compression/CompressedReadBuffer.h>
+#include <Core/Types.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadHelpers.h>
+#include <Interpreters/AggregationCommon.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashTableKeyHolder.h>
+#include <Common/HashTable/StringHashMap.h>
+#include <Common/Stopwatch.h>
+#include <common/StringRef.h>
+
+/**
+
+#include <fstream>
+#include <random>
+
+using namespace std;
+
+int main()
+{
+    std::string s;
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0, 25);
+    std::binomial_distribution<std::mt19937::result_type> binomial1(100, 0.01);
+    std::binomial_distribution<std::mt19937::result_type> binomial2(100, 0.02);
+    std::binomial_distribution<std::mt19937::result_type> binomial4(100, 0.04);
+    std::binomial_distribution<std::mt19937::result_type> binomial8(100, 0.08);
+    std::binomial_distribution<std::mt19937::result_type> binomial16(100, 0.16);
+    std::binomial_distribution<std::mt19937::result_type> binomial24(100, 0.24);
+    std::binomial_distribution<std::mt19937::result_type> binomial48(100, 0.48);
+    // 11GB
+    std::ofstream f("/tmp/terms.csv");
+    size_t l1, l2, l4, l8, l16, l24, l48;
+    for (auto n = 0ul; n < 1e8; ++n)
+    {
+        l1 = binomial1(rng) + 1;
+        l2 = binomial2(rng) + l1 + 1;
+        l4 = binomial4(rng) + l2 + 1;
+        l8 = binomial8(rng) + l4 + 1;
+        l16 = binomial16(rng) + l8 + 1;
+        l24 = binomial24(rng) + l16 + 1;
+        l48 = binomial48(rng) + l24 + 1;
+        s.resize(l48);
+        for (auto i = 0ul; i < l48 - 1; ++i)
+            s[i] = 'a' + dist(rng);
+        s[l1 - 1] = ',';
+        s[l2 - 1] = ',';
+        s[l4 - 1] = ',';
+        s[l8 - 1] = ',';
+        s[l16 - 1] = ',';
+        s[l24 - 1] = ',';
+        s[l48 - 1] = '\n';
+        f << s;
+    }
+    f.close();
+    return 0;
+}
+
+create table terms (term1 String, term2 String, term4 String, term8 String, term16 String, term24 String, term48 String) engine TinyLog;
+insert into terms select * from file('/tmp/terms.csv', CSV, 'a String, b String, c String, d String, e String, f String, g String');
+
+NOTE: for reliable test results, try isolating cpu cores and do python -m perf tune. Also bind numa nodes if any.
+# isolate cpu 18
+dir=/home/amos/git/chorigin/data/data/default/terms
+for file in term1 term2 term4 term8 term16 term24 term48; do
+    for size in 30000000 50000000 80000000 100000000; do
+        BEST_METHOD=0
+        BEST_RESULT=0
+        for method in {1..2}; do
+            echo -ne $file $size $method ''
+            numactl --membind=0 taskset -c 18 ./string_hash_map $size $method <"$dir"/"$file".bin 2>&1 | perl -nE 'say /([0-9\.]+) elem/g if /HashMap/' | tee /tmp/string_hash_map_res
+            CUR_RESULT=$(cat /tmp/string_hash_map_res | tr -d '.')
+            if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
+                BEST_METHOD=$method
+                BEST_RESULT=$CUR_RESULT
+            fi
+        done
+        echo Best: $BEST_METHOD - $BEST_RESULT
+    done
+done
+
+---------------------------
+
+term1 30000000 1 68785770.85   term2 30000000 1 42531788.83   term4 30000000 1 14759901.41   term8 30000000 1 8072903.47
+term1 30000000 2 40812128.16   term2 30000000 2 21352402.10   term4 30000000 2 9008907.80    term8 30000000 2 5822641.82
+Best: 1 - 6878577085           Best: 1 - 4253178883           Best: 1 - 1475990141           Best: 1 - 807290347
+term1 50000000 1 68027542.41   term2 50000000 1 40493742.80   term4 50000000 1 16827650.85   term8 50000000 1 7405230.14
+term1 50000000 2 37589806.02   term2 50000000 2 19362975.09   term4 50000000 2 8278094.11    term8 50000000 2 5106810.80
+Best: 1 - 6802754241           Best: 1 - 4049374280           Best: 1 - 1682765085           Best: 1 - 740523014
+term1 80000000 1 68651875.88   term2 80000000 1 38253695.50   term4 80000000 1 15847177.93   term8 80000000 1 7536319.25
+term1 80000000 2 38092189.20   term2 80000000 2 20287003.01   term4 80000000 2 9322770.34    term8 80000000 2 4355572.15
+Best: 1 - 6865187588           Best: 1 - 3825369550           Best: 1 - 1584717793           Best: 1 - 753631925
+term1 100000000 1 68641941.59  term2 100000000 1 39120834.79  term4 100000000 1 16773904.90  term8 100000000 1 7147146.55
+term1 100000000 2 38358006.72  term2 100000000 2 20629363.17  term4 100000000 2 9665201.92   term8 100000000 2 4728255.07
+Best: 1 - 6864194159           Best: 1 - 3912083479           Best: 1 - 1677390490           Best: 1 - 714714655
+
+
+term16 30000000 1 6823029.35        term24 30000000 1 5706271.14        term48 30000000 1 4695716.47
+term16 30000000 2 5672283.33        term24 30000000 2 5498855.56        term48 30000000 2 4860537.26
+Best: 1 - 682302935                 Best: 1 - 570627114                 Best: 2 - 486053726
+term16 50000000 1 6214581.25        term24 50000000 1 5249785.66        term48 50000000 1 4282606.12
+term16 50000000 2 4990361.44        term24 50000000 2 4855552.24        term48 50000000 2 4348923.29
+Best: 1 - 621458125                 Best: 1 - 524978566                 Best: 2 - 434892329
+term16 80000000 1 5382855.70        term24 80000000 1 4580133.04        term48 80000000 1 3779436.15
+term16 80000000 2 4282192.79        term24 80000000 2 4178791.09        term48 80000000 2 3788409.72
+Best: 1 - 538285570                 Best: 1 - 458013304                 Best: 2 - 378840972
+term16 100000000 1 5930103.42       term24 100000000 1 5030621.52       term48 100000000 1 4084666.73
+term16 100000000 2 4621719.60       term24 100000000 2 4499866.83       term48 100000000 2 4067029.31
+Best: 1 - 593010342                 Best: 1 - 503062152                 Best: 1 - 408466673
+
+*/
+
+
+using Value = uint64_t;
+
+template <typename Map>
+void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const char * name)
+{
+    // warm up
+    /*
+    {
+        Map map;
+        typename Map::LookupResult it;
+        bool inserted;
+
+        for (size_t i = 0, size = data.size(); i < size; ++i)
+        {
+            auto key_holder = DB::ArenaKeyHolder{data[i], pool};
+            map.emplace(key_holder, it, inserted);
+            if (inserted)
+                it->getSecond() = 0;
+            ++it->getSecond();
+        }
+    }
+    */
+
+    std::cerr << "method " << name << std::endl;
+    for (auto t = 0ul; t < 7; ++t)
+    {
+        DB::Arena pool(128 * 1024 * 1024);
+        Stopwatch watch;
+        Map map;
+        typename Map::LookupResult it;
+        bool inserted;
+
+        for (size_t i = 0, size = data.size(); i < size; ++i)
+        {
+            map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted);
+            if (inserted)
+                *lookupResultGetMapped(it) = 0;
+            ++*lookupResultGetMapped(it);
+        }
+        watch.stop();
+
+        std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl;
+        std::cerr << "single-run " << std::setprecision(3)
+                  << watch.elapsedSeconds() << std::endl;
+    }
+}
+
+/*
+template <typename Map>
+runFromFile()
+{
+    DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
+    DB::CompressedReadBuffer in2(in1);
+
+    Map map;
+    DB::Arena pool(128 * 1024 * 1024);
+    for (size_t i = 0; i < n && !in2.eof(); ++i)
+    {
+        auto key = DB::readStringBinaryInto(pool, in2);
+
+        bool inserted;
+        Map::LookupResult mapped;
+        map.emplaceKeyHolder(DB::SerializedKeyHolder(key, pool), mapped, inserted);
+    }
+}
+
+template <typename Map>
+benchFromFile()
+{
+    double best_time = -1.;
+    for (auto t = 0ul; t < 50; ++t)
+    {
+        Stopwatch watch;
+        runFromFile();
+        watch.stop();
+
+        if (best_time < 0 || best_time > watch.elapsedSeconds())
+        {
+            best_time = watch.elapsedSeconds();
+        }
+    }
+
+    std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Elapsed: " << best_time << " (" << data.size() / best_time
+              << " elem/sec.)" << std::endl;
+}
+*/
+
+
+int main(int argc, char ** argv)
+{
+    if (argc < 3)
+    {
+        std::cerr << "Usage: program n m\n";
+        return 1;
+    }
+
+    size_t n = atoi(argv[1]);
+    size_t m = atoi(argv[2]);
+
+    DB::Arena pool(128 * 1024 * 1024);
+    std::vector<StringRef> data(n);
+
+    std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
+
+    {
+        Stopwatch watch;
+        DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
+        DB::CompressedReadBuffer in2(in1);
+
+        std::string tmp;
+        for (size_t i = 0; i < n && !in2.eof(); ++i)
+        {
+            DB::readStringBinary(tmp, in2);
+            data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
+        }
+
+        watch.stop();
+        std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
+                  << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
+    }
+
+    if (!m || m == 1)
+        bench<StringHashMap<Value>>(data, pool, "StringHashMap");
+    if (!m || m == 2)
+        bench<HashMapWithSavedHash<StringRef, Value>>(data, pool, "HashMapWithSavedHash");
+    if (!m || m == 3)
+        bench<HashMap<StringRef, Value>>(data, pool, "HashMap");
+    return 0;
+}