提交 a8644478 编写于 作者: A Amos Bird 提交者: Alexander Kuzmenkov

Introduce String Hash Map.

It speeds up aggregation over short string keys. Use it as a default
aggregation method for string keys.
上级 5286d0af
......@@ -358,6 +358,12 @@ protected:
template <typename, typename, typename, typename, typename, typename, size_t>
friend class TwoLevelHashTable;
template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
template <typename SubMaps>
friend class StringHashTable;
using HashValue = size_t;
using Self = HashTable;
using cell_type = Cell;
......
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTableAllocator.h>
#include <Common/HashTable/StringHashTable.h>
template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
};
template<typename Key, typename Mapped>
auto lookupResultGetMapped(StringHashMapCell<Key, Mapped> * cell) { return &cell->getSecond(); }
template <typename TMapped>
struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
void setZero() { this->value.first.low = 0; }
};
template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
void setZero() { this->value.first.a = 0; }
};
template <typename TMapped>
struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
};
template <typename TMapped, typename Allocator>
struct StringHashMapSubMaps
{
using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
};
template <typename TMapped, typename Allocator = HashTableAllocator>
class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
{
public:
using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
using Self = StringHashMap;
using Key = StringRef;
using key_type = StringRef;
using mapped_type = TMapped;
using value_type = typename Base::Ts::value_type;
using LookupResult = mapped_type *;
using Base::Base;
/// Merge every cell's value of current map into the destination map.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, a new cell gets emplaced into that map,
/// and func is invoked with the third argument emplaced set to true. Otherwise
/// emplaced is set to false.
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
if (this->m0.hasZero())
{
const bool emplace_new_zero = !that.m0.hasZero();
if (emplace_new_zero)
{
that.m0.setHasZero();
}
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(),
emplace_new_zero);
}
this->m1.mergeToViaEmplace(that.m1, func);
this->m2.mergeToViaEmplace(that.m2, func);
this->m3.mergeToViaEmplace(that.m3, func);
this->ms.mergeToViaEmplace(that.ms, func);
}
/// Merge every cell's value of current map into the destination map via find.
/// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, func is invoked with the third argument
/// exist set to false. Otherwise exist is set to true.
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
if (this->m0.hasZero())
{
if (that.m0.hasZero())
{
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true);
}
else
{
func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false);
}
}
this->m1.mergeToViaFind(that.m1, func);
this->m2.mergeToViaFind(that.m2, func);
this->m3.mergeToViaFind(that.m3, func);
this->ms.mergeToViaFind(that.ms, func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
{
bool inserted;
LookupResult it = nullptr;
emplace(x, it, inserted);
if (inserted)
new (it) mapped_type();
return *it;
}
template <typename Func>
void ALWAYS_INLINE forEachValue(Func && func)
{
if (this->m0.size())
{
func(StringRef{}, this->m0.zeroValue()->getSecond());
}
for (auto & v : this->m1)
{
func(toStringRef(v.getFirst()), v.getSecond());
}
for (auto & v : this->m2)
{
func(toStringRef(v.getFirst()), v.getSecond());
}
for (auto & v : this->m3)
{
func(toStringRef(v.getFirst()), v.getSecond());
}
for (auto & v : this->ms)
{
func(v.getFirst(), v.getSecond());
}
}
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
if (this->m0.size())
func(this->m0.zeroValue()->getSecond());
for (auto & v : this->m1)
func(v.getSecond());
for (auto & v : this->m2)
func(v.getSecond());
for (auto & v : this->m3)
func(v.getSecond());
for (auto & v : this->ms)
func(v.getSecond());
}
};
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
#define CASE_1_8 \
case 1: \
case 2: \
case 3: \
case 4: \
case 5: \
case 6: \
case 7: \
case 8
#define CASE_9_16 \
case 9: \
case 10: \
case 11: \
case 12: \
case 13: \
case 14: \
case 15: \
case 16
#define CASE_17_24 \
case 17: \
case 18: \
case 19: \
case 20: \
case 21: \
case 22: \
case 23: \
case 24
struct StringKey0
{
};
using StringKey8 = UInt64;
using StringKey16 = DB::UInt128;
struct StringKey24
{
UInt64 a;
UInt64 b;
UInt64 c;
bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); }
bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; }
bool operator!=(const UInt64 rhs) const { return !operator==(rhs); }
StringKey24 & operator=(const UInt64 rhs)
{
a = rhs;
b = 0;
c = 0;
return *this;
}
};
inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
{
return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
{
return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.high) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
{
return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
}
inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s)
{
return s;
}
struct StringHashTableHash
{
#if defined(__SSE4_2__)
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key.low);
res = _mm_crc32_u64(res, key.high);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key.a);
res = _mm_crc32_u64(res, key.b);
res = _mm_crc32_u64(res, key.c);
return res;
}
#else
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
}
#endif
size_t ALWAYS_INLINE operator()(StringRef key) const
{
return StringRefHash()(key);
}
};
template <typename Cell>
struct StringHashTableEmpty
{
using Self = StringHashTableEmpty;
bool has_zero = false;
std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
public:
bool hasZero() const { return has_zero; }
void setHasZero()
{
has_zero = true;
new (zeroValue()) Cell();
}
void setHasZero(const Cell & other)
{
has_zero = true;
new (zeroValue()) Cell(other);
}
void clearHasZero()
{
has_zero = false;
if (!std::is_trivially_destructible_v<Cell>)
zeroValue()->~Cell();
}
Cell * zeroValue() { return reinterpret_cast<Cell *>(&zero_value_storage); }
const Cell * zeroValue() const { return reinterpret_cast<const Cell *>(&zero_value_storage); }
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */)
{
if (!hasZero())
{
setHasZero();
inserted = true;
}
else
inserted = false;
it = zeroValue();
}
template <typename Key>
LookupResult ALWAYS_INLINE find(Key, size_t /* hash */)
{
return hasZero() ? zeroValue() : nullptr;
}
void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
size_t size() const { return hasZero() ? 1 : 0; }
bool empty() const { return !hasZero(); }
size_t getBufferSizeInBytes() const { return sizeof(Cell); }
size_t getCollisions() const { return 0; }
};
template <size_t initial_size_degree = 8>
struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
{
// Smooth growing for string maps
void increaseSize() { this->size_degree += 1; }
};
template <typename SubMaps>
class StringHashTable : private boost::noncopyable
{
protected:
static constexpr size_t NUM_MAPS = 5;
// Map for storing empty string
using T0 = typename SubMaps::T0;
// Short strings are stored as numbers
using T1 = typename SubMaps::T1;
using T2 = typename SubMaps::T2;
using T3 = typename SubMaps::T3;
// Long strings are stored as StringRef along with saved hash
using Ts = typename SubMaps::Ts;
using Self = StringHashTable;
template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
T0 m0;
T1 m1;
T2 m2;
T3 m3;
Ts ms;
public:
using Key = StringRef;
using key_type = Key;
using value_type = typename Ts::value_type;
using LookupResult = typename Ts::mapped_type *;
StringHashTable() {}
StringHashTable(size_t reserve_for_num_elements)
: m1{reserve_for_num_elements / 4}
, m2{reserve_for_num_elements / 4}
, m3{reserve_for_num_elements / 4}
, ms{reserve_for_num_elements / 4}
{
}
StringHashTable(StringHashTable && rhs) { *this = std::move(rhs); }
~StringHashTable() {}
public:
// Dispatch is written in a way that maximizes the performance:
// 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table
// 3. Combine hash computation along with key loading
// 4. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness and SSE4.2
template <typename KeyHolder, typename Func>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
{
static constexpr StringKey0 key0{};
const StringRef & x = keyHolderGetKey(key_holder);
size_t sz = x.size;
const char * p = x.data;
// pending bits that needs to be shifted out
char s = (-sz & 7) * 8;
union
{
StringKey8 k8;
StringKey16 k16;
StringKey24 k24;
UInt64 n[3];
};
StringHashTableHash hash;
switch (sz)
{
case 0:
keyHolderDiscardKey(key_holder);
return func(m0, key0, 0);
CASE_1_8 : {
// first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ul >> s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
}
keyHolderDiscardKey(key_holder);
return func(m1, k8, hash(k8));
}
CASE_9_16 : {
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
keyHolderDiscardKey(key_holder);
return func(m2, k16, hash(k16));
}
CASE_17_24 : {
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
keyHolderDiscardKey(key_holder);
return func(m3, k24, hash(k24));
}
default: {
return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
}
}
}
struct EmplaceCallable
{
LookupResult & mapped;
bool & inserted;
EmplaceCallable(LookupResult & mapped_, bool & inserted_)
: mapped(mapped_), inserted(inserted_) {}
template <typename Map, typename KeyHolder>
void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
{
typename Map::LookupResult result;
map.emplace(key_holder, result, inserted, hash);
mapped = lookupResultGetMapped(result);
}
};
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
this->dispatch(key_holder, EmplaceCallable(it, inserted));
}
struct FindCallable
{
template <typename Map, typename KeyHolder>
LookupResult ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
{
return lookupResultGetMapped(map.find(keyHolderGetKey(key_holder), hash));
}
};
LookupResult ALWAYS_INLINE find(Key x)
{
return dispatch(x, FindCallable{});
}
void write(DB::WriteBuffer & wb) const
{
m0.write(wb);
m1.write(wb);
m2.write(wb);
m3.write(wb);
ms.write(wb);
}
void writeText(DB::WriteBuffer & wb) const
{
m0.writeText(wb);
DB::writeChar(',', wb);
m1.writeText(wb);
DB::writeChar(',', wb);
m2.writeText(wb);
DB::writeChar(',', wb);
m3.writeText(wb);
DB::writeChar(',', wb);
ms.writeText(wb);
}
void read(DB::ReadBuffer & rb)
{
m0.read(rb);
m1.read(rb);
m2.read(rb);
m3.read(rb);
ms.read(rb);
}
void readText(DB::ReadBuffer & rb)
{
m0.readText(rb);
DB::assertChar(',', rb);
m1.readText(rb);
DB::assertChar(',', rb);
m2.readText(rb);
DB::assertChar(',', rb);
m3.readText(rb);
DB::assertChar(',', rb);
ms.readText(rb);
}
size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
size_t getBufferSizeInBytes() const
{
return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
+ ms.getBufferSizeInBytes();
}
void clearAndShrink()
{
m1.clearHasZero();
m1.clearAndShrink();
m2.clearAndShrink();
m3.clearAndShrink();
ms.clearAndShrink();
}
};
#pragma once
#include <Common/HashTable/StringHashMap.h>
#include <Common/HashTable/TwoLevelStringHashTable.h>
template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
{
public:
using Key = StringRef;
using key_type = Key;
using Self = TwoLevelStringHashMap;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
using Base::Base;
using typename Base::Impl;
using mapped_type = TMapped;
using value_type = typename Base::value_type;
using LookupResult = typename Base::LookupResult;
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
return this->impls[i].forEachMapped(func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
{
bool inserted;
LookupResult it;
emplace(x, it, inserted);
if (inserted)
new (lookupResultGetMapped(it)) mapped_type();
return *lookupResultGetMapped(it);
}
};
#pragma once
#include <Common/HashTable/StringHashTable.h>
template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
class TwoLevelStringHashTable : private boost::noncopyable
{
protected:
using HashValue = size_t;
using Self = TwoLevelStringHashTable;
public:
using Key = StringRef;
using Impl = ImplTable;
static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
// TODO: currently hashing contains redundant computations when doing distributed or external aggregations
size_t hash(const Key & x) const
{
return const_cast<Self &>(*this).dispatch(x,
[&](const auto &, const auto &, size_t hash) { return hash; });
}
size_t operator()(const Key & x) const { return hash(x); }
/// NOTE Bad for hash tables with more than 2^32 cells.
static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
public:
using key_type = typename Impl::key_type;
using value_type = typename Impl::value_type;
using LookupResult = typename Impl::LookupResult;
Impl impls[NUM_BUCKETS];
TwoLevelStringHashTable() {}
template <typename Source>
TwoLevelStringHashTable(const Source & src)
{
if (src.m0.hasZero())
impls[0].m0.setHasZero(*src.m0.zeroValue());
for (auto & v : src.m1)
{
size_t hash_value = v.getHash(src.m1);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m1.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m2)
{
size_t hash_value = v.getHash(src.m2);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m2.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m3)
{
size_t hash_value = v.getHash(src.m3);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m3.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.ms)
{
size_t hash_value = v.getHash(src.ms);
size_t buck = getBucketFromHash(hash_value);
impls[buck].ms.insertUniqueNonZero(&v, hash_value);
}
}
// Dispatch is written in a way that maximizes the performance:
// 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table
// 3. Combine hash computation along with bucket computation and key loading
// 4. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness and SSE4.2
template <typename Func, typename KeyHolder>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
{
static constexpr StringKey0 key0{};
const StringRef & x = keyHolderGetKey(key_holder);
size_t sz = x.size;
const char * p = x.data;
// pending bits that needs to be shifted out
char s = (-sz & 7) * 8;
size_t res = -1ULL;
size_t buck;
union
{
StringKey8 k8;
StringKey16 k16;
StringKey24 k24;
UInt64 n[3];
};
StringHashTableHash hash;
switch (sz)
{
case 0:
keyHolderDiscardKey(key_holder);
return func(impls[0].m0, key0, 0);
CASE_1_8 : {
// first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ul >> s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
}
res = hash(k8);
buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m1, k8, res);
}
CASE_9_16 : {
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
res = hash(k16);
buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m2, k16, res);
}
CASE_17_24 : {
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
res = hash(k24);
buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m3, k24, res);
}
default: {
res = hash(x);
buck = getBucketFromHash(res);
return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
}
}
}
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted});
}
LookupResult ALWAYS_INLINE find(Key x)
{
return dispatch(x, typename Impl::FindCallable{});
}
void write(DB::WriteBuffer & wb) const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
impls[i].write(wb);
}
void writeText(DB::WriteBuffer & wb) const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
{
if (i != 0)
DB::writeChar(',', wb);
impls[i].writeText(wb);
}
}
void read(DB::ReadBuffer & rb)
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
impls[i].read(rb);
}
void readText(DB::ReadBuffer & rb)
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
{
if (i != 0)
DB::assertChar(',', rb);
impls[i].readText(rb);
}
}
size_t size() const
{
size_t res = 0;
for (size_t i = 0; i < NUM_BUCKETS; ++i)
res += impls[i].size();
return res;
}
bool empty() const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
if (!impls[i].empty())
return false;
return true;
}
size_t getBufferSizeInBytes() const
{
size_t res = 0;
for (size_t i = 0; i < NUM_BUCKETS; ++i)
res += impls[i].getBufferSizeInBytes();
return res;
}
};
......@@ -11,6 +11,9 @@
#include <Common/HashTable/FixedHashMap.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/TwoLevelHashMap.h>
#include <Common/HashTable/StringHashMap.h>
#include <Common/HashTable/TwoLevelStringHashMap.h>
#include <Common/ThreadPool.h>
#include <Common/UInt128.h>
#include <Common/LRUCache.h>
......@@ -69,12 +72,20 @@ using AggregatedDataWithUInt8Key = FixedHashMap<UInt8, AggregateDataPtr>;
using AggregatedDataWithUInt16Key = FixedHashMap<UInt16, AggregateDataPtr>;
using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;
using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, UInt128HashCRC32>;
using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, UInt256HashCRC32>;
......@@ -139,6 +150,8 @@ struct AggregationDataWithNullKeyTwoLevel : public Base
template <typename ... Types>
using HashTableWithNullKey = AggregationDataWithNullKey<HashMapTable<Types ...>>;
template <typename ... Types>
using StringHashTableWithNullKey = AggregationDataWithNullKey<StringHashMap<Types ...>>;
using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
......@@ -149,6 +162,10 @@ using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey<Aggregate
using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>,
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
TwoLevelStringHashMap<AggregateDataPtr, HashTableAllocator, StringHashTableWithNullKey>>;
using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel<
TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr, DefaultHash<StringRef>,
TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>;
......@@ -216,6 +233,32 @@ struct AggregationMethodString
};
/// Same as above but without cache
template <typename TData>
struct AggregationMethodStringNoCache
{
using Data = TData;
using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type;
Data data;
AggregationMethodStringNoCache() {}
template <typename Other>
AggregationMethodStringNoCache(const Other & other) : data(other.data) {}
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false>;
static const bool low_cardinality_optimization = false;
static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &)
{
key_columns[0]->insertData(key.data, key.size);
}
};
/// For the case where there is one fixed-length string key.
template <typename TData>
struct AggregationMethodFixedString
......@@ -241,6 +284,32 @@ struct AggregationMethodFixedString
}
};
/// Same as above but without cache
template <typename TData>
struct AggregationMethodFixedStringNoCache
{
using Data = TData;
using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type;
Data data;
AggregationMethodFixedStringNoCache() {}
template <typename Other>
AggregationMethodFixedStringNoCache(const Other & other) : data(other.data) {}
using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, Mapped, true, false>;
static const bool low_cardinality_optimization = false;
static void insertKeyIntoColumns(const StringRef & key, MutableColumns & key_columns, const Sizes &)
{
key_columns[0]->insertData(key.data, key.size);
}
};
/// Single low cardinality column.
template <typename SingleColumnMethod>
struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
......@@ -434,16 +503,16 @@ struct AggregatedDataVariants : private boost::noncopyable
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64Key>> key32;
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>> key64;
std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKey>> key_string;
std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKey>> key_fixed_string;
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKey>> key_string;
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKey>> key_fixed_string;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128>> keys128;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256>> keys256;
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKey>> serialized;
std::unique_ptr<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt64KeyTwoLevel>> key32_two_level;
std::unique_ptr<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64KeyTwoLevel>> key64_two_level;
std::unique_ptr<AggregationMethodString<AggregatedDataWithStringKeyTwoLevel>> key_string_two_level;
std::unique_ptr<AggregationMethodFixedString<AggregatedDataWithStringKeyTwoLevel>> key_fixed_string_two_level;
std::unique_ptr<AggregationMethodStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_string_two_level;
std::unique_ptr<AggregationMethodFixedStringNoCache<AggregatedDataWithShortStringKeyTwoLevel>> key_fixed_string_two_level;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys128TwoLevel>> keys128_two_level;
std::unique_ptr<AggregationMethodKeysFixed<AggregatedDataWithKeys256TwoLevel>> keys256_two_level;
std::unique_ptr<AggregationMethodSerialized<AggregatedDataWithStringKeyTwoLevel>> serialized_two_level;
......
......@@ -37,6 +37,12 @@ add_executable (hash_map_string_small hash_map_string_small.cpp)
target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
target_link_libraries (hash_map_string_small PRIVATE dbms)
add_executable (string_hash_map string_hash_map.cpp)
target_link_libraries (string_hash_map PRIVATE dbms)
add_executable (string_hash_map_aggregation string_hash_map.cpp)
target_link_libraries (string_hash_map_aggregation PRIVATE dbms)
add_executable (two_level_hash_map two_level_hash_map.cpp)
target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
target_link_libraries (two_level_hash_map PRIVATE dbms)
......
#include <iomanip>
#include <iostream>
#include <vector>
#include <Compression/CompressedReadBuffer.h>
#include <Core/Types.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/AggregationCommon.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTableKeyHolder.h>
#include <Common/HashTable/StringHashMap.h>
#include <Common/Stopwatch.h>
#include <common/StringRef.h>
/**
#include <fstream>
#include <random>
using namespace std;
int main()
{
std::string s;
std::random_device dev;
std::mt19937 rng(dev());
std::uniform_int_distribution<std::mt19937::result_type> dist(0, 25);
std::binomial_distribution<std::mt19937::result_type> binomial1(100, 0.01);
std::binomial_distribution<std::mt19937::result_type> binomial2(100, 0.02);
std::binomial_distribution<std::mt19937::result_type> binomial4(100, 0.04);
std::binomial_distribution<std::mt19937::result_type> binomial8(100, 0.08);
std::binomial_distribution<std::mt19937::result_type> binomial16(100, 0.16);
std::binomial_distribution<std::mt19937::result_type> binomial24(100, 0.24);
std::binomial_distribution<std::mt19937::result_type> binomial48(100, 0.48);
// 11GB
std::ofstream f("/tmp/terms.csv");
size_t l1, l2, l4, l8, l16, l24, l48;
for (auto n = 0ul; n < 1e8; ++n)
{
l1 = binomial1(rng) + 1;
l2 = binomial2(rng) + l1 + 1;
l4 = binomial4(rng) + l2 + 1;
l8 = binomial8(rng) + l4 + 1;
l16 = binomial16(rng) + l8 + 1;
l24 = binomial24(rng) + l16 + 1;
l48 = binomial48(rng) + l24 + 1;
s.resize(l48);
for (auto i = 0ul; i < l48 - 1; ++i)
s[i] = 'a' + dist(rng);
s[l1 - 1] = ',';
s[l2 - 1] = ',';
s[l4 - 1] = ',';
s[l8 - 1] = ',';
s[l16 - 1] = ',';
s[l24 - 1] = ',';
s[l48 - 1] = '\n';
f << s;
}
f.close();
return 0;
}
create table terms (term1 String, term2 String, term4 String, term8 String, term16 String, term24 String, term48 String) engine TinyLog;
insert into terms select * from file('/tmp/terms.csv', CSV, 'a String, b String, c String, d String, e String, f String, g String');
NOTE: for reliable test results, try isolating cpu cores and do python -m perf tune. Also bind numa nodes if any.
# isolate cpu 18
dir=/home/amos/git/chorigin/data/data/default/terms
for file in term1 term2 term4 term8 term16 term24 term48; do
for size in 30000000 50000000 80000000 100000000; do
BEST_METHOD=0
BEST_RESULT=0
for method in {1..2}; do
echo -ne $file $size $method ''
numactl --membind=0 taskset -c 18 ./string_hash_map $size $method <"$dir"/"$file".bin 2>&1 | perl -nE 'say /([0-9\.]+) elem/g if /HashMap/' | tee /tmp/string_hash_map_res
CUR_RESULT=$(cat /tmp/string_hash_map_res | tr -d '.')
if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
BEST_METHOD=$method
BEST_RESULT=$CUR_RESULT
fi
done
echo Best: $BEST_METHOD - $BEST_RESULT
done
done
---------------------------
term1 30000000 1 68785770.85 term2 30000000 1 42531788.83 term4 30000000 1 14759901.41 term8 30000000 1 8072903.47
term1 30000000 2 40812128.16 term2 30000000 2 21352402.10 term4 30000000 2 9008907.80 term8 30000000 2 5822641.82
Best: 1 - 6878577085 Best: 1 - 4253178883 Best: 1 - 1475990141 Best: 1 - 807290347
term1 50000000 1 68027542.41 term2 50000000 1 40493742.80 term4 50000000 1 16827650.85 term8 50000000 1 7405230.14
term1 50000000 2 37589806.02 term2 50000000 2 19362975.09 term4 50000000 2 8278094.11 term8 50000000 2 5106810.80
Best: 1 - 6802754241 Best: 1 - 4049374280 Best: 1 - 1682765085 Best: 1 - 740523014
term1 80000000 1 68651875.88 term2 80000000 1 38253695.50 term4 80000000 1 15847177.93 term8 80000000 1 7536319.25
term1 80000000 2 38092189.20 term2 80000000 2 20287003.01 term4 80000000 2 9322770.34 term8 80000000 2 4355572.15
Best: 1 - 6865187588 Best: 1 - 3825369550 Best: 1 - 1584717793 Best: 1 - 753631925
term1 100000000 1 68641941.59 term2 100000000 1 39120834.79 term4 100000000 1 16773904.90 term8 100000000 1 7147146.55
term1 100000000 2 38358006.72 term2 100000000 2 20629363.17 term4 100000000 2 9665201.92 term8 100000000 2 4728255.07
Best: 1 - 6864194159 Best: 1 - 3912083479 Best: 1 - 1677390490 Best: 1 - 714714655
term16 30000000 1 6823029.35 term24 30000000 1 5706271.14 term48 30000000 1 4695716.47
term16 30000000 2 5672283.33 term24 30000000 2 5498855.56 term48 30000000 2 4860537.26
Best: 1 - 682302935 Best: 1 - 570627114 Best: 2 - 486053726
term16 50000000 1 6214581.25 term24 50000000 1 5249785.66 term48 50000000 1 4282606.12
term16 50000000 2 4990361.44 term24 50000000 2 4855552.24 term48 50000000 2 4348923.29
Best: 1 - 621458125 Best: 1 - 524978566 Best: 2 - 434892329
term16 80000000 1 5382855.70 term24 80000000 1 4580133.04 term48 80000000 1 3779436.15
term16 80000000 2 4282192.79 term24 80000000 2 4178791.09 term48 80000000 2 3788409.72
Best: 1 - 538285570 Best: 1 - 458013304 Best: 2 - 378840972
term16 100000000 1 5930103.42 term24 100000000 1 5030621.52 term48 100000000 1 4084666.73
term16 100000000 2 4621719.60 term24 100000000 2 4499866.83 term48 100000000 2 4067029.31
Best: 1 - 593010342 Best: 1 - 503062152 Best: 1 - 408466673
*/
using Value = uint64_t;
template <typename Map>
void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const char * name)
{
// warm up
/*
{
Map map;
typename Map::LookupResult it;
bool inserted;
for (size_t i = 0, size = data.size(); i < size; ++i)
{
auto key_holder = DB::ArenaKeyHolder{data[i], pool};
map.emplace(key_holder, it, inserted);
if (inserted)
it->getSecond() = 0;
++it->getSecond();
}
}
*/
std::cerr << "method " << name << std::endl;
for (auto t = 0ul; t < 7; ++t)
{
DB::Arena pool(128 * 1024 * 1024);
Stopwatch watch;
Map map;
typename Map::LookupResult it;
bool inserted;
for (size_t i = 0, size = data.size(); i < size; ++i)
{
map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
}
watch.stop();
std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl;
std::cerr << "single-run " << std::setprecision(3)
<< watch.elapsedSeconds() << std::endl;
}
}
/*
template <typename Map>
runFromFile()
{
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
Map map;
DB::Arena pool(128 * 1024 * 1024);
for (size_t i = 0; i < n && !in2.eof(); ++i)
{
auto key = DB::readStringBinaryInto(pool, in2);
bool inserted;
Map::LookupResult mapped;
map.emplaceKeyHolder(DB::SerializedKeyHolder(key, pool), mapped, inserted);
}
}
template <typename Map>
benchFromFile()
{
double best_time = -1.;
for (auto t = 0ul; t < 50; ++t)
{
Stopwatch watch;
runFromFile();
watch.stop();
if (best_time < 0 || best_time > watch.elapsedSeconds())
{
best_time = watch.elapsedSeconds();
}
}
std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Elapsed: " << best_time << " (" << data.size() / best_time
<< " elem/sec.)" << std::endl;
}
*/
int main(int argc, char ** argv)
{
if (argc < 3)
{
std::cerr << "Usage: program n m\n";
return 1;
}
size_t n = atoi(argv[1]);
size_t m = atoi(argv[2]);
DB::Arena pool(128 * 1024 * 1024);
std::vector<StringRef> data(n);
std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
{
Stopwatch watch;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
std::string tmp;
for (size_t i = 0; i < n && !in2.eof(); ++i)
{
DB::readStringBinary(tmp, in2);
data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size());
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
<< n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
}
if (!m || m == 1)
bench<StringHashMap<Value>>(data, pool, "StringHashMap");
if (!m || m == 2)
bench<HashMapWithSavedHash<StringRef, Value>>(data, pool, "HashMapWithSavedHash");
if (!m || m == 3)
bench<HashMap<StringRef, Value>>(data, pool, "HashMap");
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册