提交 dfd9f266 编写于 作者: A Alexander Kuzmenkov

Port the latest dispatch() improvements.

上级 d865aeba
...@@ -3,36 +3,6 @@ ...@@ -3,36 +3,6 @@
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h> #include <Common/HashTable/HashTable.h>
#define CASE_1_8 \
case 1: \
case 2: \
case 3: \
case 4: \
case 5: \
case 6: \
case 7: \
case 8
#define CASE_9_16 \
case 9: \
case 10: \
case 11: \
case 12: \
case 13: \
case 14: \
case 15: \
case 16
#define CASE_17_24 \
case 17: \
case 18: \
case 19: \
case 20: \
case 21: \
case 22: \
case 23: \
case 24
struct StringKey0 struct StringKey0
{ {
}; };
...@@ -46,17 +16,6 @@ struct StringKey24 ...@@ -46,17 +16,6 @@ struct StringKey24
UInt64 c; UInt64 c;
bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); }
bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; }
bool operator!=(const UInt64 rhs) const { return !operator==(rhs); }
StringKey24 & operator=(const UInt64 rhs)
{
a = rhs;
b = 0;
c = 0;
return *this;
}
}; };
inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n) inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
...@@ -71,10 +30,6 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n) ...@@ -71,10 +30,6 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
{ {
return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
} }
inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s)
{
return s;
}
struct StringHashTableHash struct StringHashTableHash
{ {
...@@ -242,18 +197,23 @@ public: ...@@ -242,18 +197,23 @@ public:
// Dispatch is written in a way that maximizes the performance: // Dispatch is written in a way that maximizes the performance:
// 1. Always memcpy 8 times bytes // 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table // 2. Use switch case extension to generate fast dispatching table
// 3. Combine hash computation along with key loading // 3. Funcs are named callables that can be force_inlined
// 4. Funcs are named callables that can be force_inlined // NOTE: It relies on Little Endianness
// NOTE: It relies on Little Endianness and SSE4.2
template <typename KeyHolder, typename Func> template <typename KeyHolder, typename Func>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
{ {
static constexpr StringKey0 key0{};
const StringRef & x = keyHolderGetKey(key_holder); const StringRef & x = keyHolderGetKey(key_holder);
size_t sz = x.size; const size_t sz = x.size;
if (sz == 0)
{
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder);
return func(m0, key0, 0);
}
const char * p = x.data; const char * p = x.data;
// pending bits that needs to be shifted out // pending bits that needs to be shifted out
char s = (-sz & 7) * 8; const char s = (-sz & 7) * 8;
union union
{ {
StringKey8 k8; StringKey8 k8;
...@@ -262,12 +222,10 @@ public: ...@@ -262,12 +222,10 @@ public:
UInt64 n[3]; UInt64 n[3];
}; };
StringHashTableHash hash; StringHashTableHash hash;
switch (sz) switch ((sz - 1) >> 3)
{ {
case 0: case 0: // 1..8 bytes
keyHolderDiscardKey(key_holder); {
return func(m0, key0, 0);
CASE_1_8 : {
// first half page // first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{ {
...@@ -283,7 +241,8 @@ public: ...@@ -283,7 +241,8 @@ public:
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m1, k8, hash(k8)); return func(m1, k8, hash(k8));
} }
CASE_9_16 : { case 1: // 9..16 bytes
{
memcpy(&n[0], p, 8); memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8; const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8); memcpy(&n[1], lp, 8);
...@@ -291,7 +250,8 @@ public: ...@@ -291,7 +250,8 @@ public:
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m2, k16, hash(k16)); return func(m2, k16, hash(k16));
} }
CASE_17_24 : { case 2: // 17..24 bytes
{
memcpy(&n[0], p, 16); memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8; const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8); memcpy(&n[2], lp, 8);
...@@ -299,7 +259,8 @@ public: ...@@ -299,7 +259,8 @@ public:
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(m3, k24, hash(k24)); return func(m3, k24, hash(k24));
} }
default: { default: // >= 25 bytes
{
return func(ms, std::forward<KeyHolder>(key_holder), hash(x)); return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
} }
} }
......
...@@ -69,23 +69,23 @@ public: ...@@ -69,23 +69,23 @@ public:
} }
} }
// Dispatch is written in a way that maximizes the performance: // This function is mostly the same as StringHashTable::dispatch, but with
// 1. Always memcpy 8 times bytes // added bucket computation. See the comments there.
// 2. Use switch case extension to generate fast dispatching table
// 3. Combine hash computation along with bucket computation and key loading
// 4. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness and SSE4.2
template <typename Func, typename KeyHolder> template <typename Func, typename KeyHolder>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
{ {
static constexpr StringKey0 key0{};
const StringRef & x = keyHolderGetKey(key_holder); const StringRef & x = keyHolderGetKey(key_holder);
size_t sz = x.size; const size_t sz = x.size;
if (sz == 0)
{
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder);
return func(impls[0].m0, key0, 0);
}
const char * p = x.data; const char * p = x.data;
// pending bits that needs to be shifted out // pending bits that needs to be shifted out
char s = (-sz & 7) * 8; const char s = (-sz & 7) * 8;
size_t res = -1ULL;
size_t buck;
union union
{ {
StringKey8 k8; StringKey8 k8;
...@@ -94,12 +94,10 @@ public: ...@@ -94,12 +94,10 @@ public:
UInt64 n[3]; UInt64 n[3];
}; };
StringHashTableHash hash; StringHashTableHash hash;
switch (sz) switch ((sz - 1) >> 3)
{ {
case 0: case 0:
keyHolderDiscardKey(key_holder); {
return func(impls[0].m0, key0, 0);
CASE_1_8 : {
// first half page // first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0) if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{ {
...@@ -112,34 +110,37 @@ public: ...@@ -112,34 +110,37 @@ public:
memcpy(&n[0], lp, 8); memcpy(&n[0], lp, 8);
n[0] >>= s; n[0] >>= s;
} }
res = hash(k8); auto res = hash(k8);
buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m1, k8, res); return func(impls[buck].m1, k8, res);
} }
CASE_9_16 : { case 1:
{
memcpy(&n[0], p, 8); memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8; const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8); memcpy(&n[1], lp, 8);
n[1] >>= s; n[1] >>= s;
res = hash(k16); auto res = hash(k16);
buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m2, k16, res); return func(impls[buck].m2, k16, res);
} }
CASE_17_24 : { case 2:
{
memcpy(&n[0], p, 16); memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8; const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8); memcpy(&n[2], lp, 8);
n[2] >>= s; n[2] >>= s;
res = hash(k24); auto res = hash(k24);
buck = getBucketFromHash(res); auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder); keyHolderDiscardKey(key_holder);
return func(impls[buck].m3, k24, res); return func(impls[buck].m3, k24, res);
} }
default: { default:
res = hash(x); {
buck = getBucketFromHash(res); auto res = hash(x);
auto buck = getBucketFromHash(res);
return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res); return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册