Port the latest dispatch() improvements.

dfd9f266 · Alexander Kuzmenkov · d865aeba · dfd9f266 · dfd9f266
Showing with 47 addition and 85 deletion

dbms/src/Common/HashTable/StringHashTable.h dbms/src/Common/HashTable/StringHashTable.h +20 -59

dbms/src/Common/HashTable/TwoLevelStringHashTable.h dbms/src/Common/HashTable/TwoLevelStringHashTable.h +27 -26

未找到文件。
--- a/dbms/src/Common/HashTable/StringHashTable.h
+++ b/dbms/src/Common/HashTable/StringHashTable.h
@@ -3,36 +3,6 @@
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/HashTable.h>
-#define CASE_1_8 \
-    case 1: \
-    case 2: \
-    case 3: \
-    case 4: \
-    case 5: \
-    case 6: \
-    case 7: \
-    case 8
-#define CASE_9_16 \
-    case 9: \
-    case 10: \
-    case 11: \
-    case 12: \
-    case 13: \
-    case 14: \
-    case 15: \
-    case 16
-#define CASE_17_24 \
-    case 17: \
-    case 18: \
-    case 19: \
-    case 20: \
-    case 21: \
-    case 22: \
-    case 23: \
-    case 24
 struct StringKey0
 {
 };
@@ -46,17 +16,6 @@ struct StringKey24
    UInt64 c;
    bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
-    bool operator!=(const StringKey24 rhs) const { return !operator==(rhs); }
-    bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0; }
-    bool operator!=(const UInt64 rhs) const { return !operator==(rhs); }
-    StringKey24 & operator=(const UInt64 rhs)
-    {
-        a = rhs;
-        b = 0;
-        c = 0;
-        return *this;
-    }
 };
 inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
@@ -71,10 +30,6 @@ inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
 {
    return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
 }
-inline const StringRef & ALWAYS_INLINE toStringRef(const StringRef & s)
-{
-    return s;
-}
 struct StringHashTableHash
 {
@@ -242,18 +197,23 @@ public:
    // Dispatch is written in a way that maximizes the performance:
    // 1. Always memcpy 8 times bytes
    // 2. Use switch case extension to generate fast dispatching table
-    // 3. Combine hash computation along with key loading
+    // 3. Funcs are named callables that can be force_inlined
-    // 4. Funcs are named callables that can be force_inlined
+    // NOTE: It relies on Little Endianness
-    // NOTE: It relies on Little Endianness and SSE4.2
    template <typename KeyHolder, typename Func>
    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
    {
-        static constexpr StringKey0 key0{};
        const StringRef & x = keyHolderGetKey(key_holder);
-        size_t sz = x.size;
+        const size_t sz = x.size;
+        if (sz == 0)
+        {
+            static constexpr StringKey0 key0{};
+            keyHolderDiscardKey(key_holder);
+            return func(m0, key0, 0);
+        }
        const char * p = x.data;
        // pending bits that needs to be shifted out
-        char s = (-sz & 7) * 8;
+        const char s = (-sz & 7) * 8;
        union
        {
            StringKey8 k8;
@@ -262,12 +222,10 @@ public:
            UInt64 n[3];
        };
        StringHashTableHash hash;
-        switch (sz)
+        switch ((sz - 1) >> 3)
        {
-            case 0:
+            case 0: // 1..8 bytes
-                keyHolderDiscardKey(key_holder);
+            {
-                return func(m0, key0, 0);
-            CASE_1_8 : {
                // first half page
                if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
                {
@@ -283,7 +241,8 @@ public:
                keyHolderDiscardKey(key_holder);
                return func(m1, k8, hash(k8));
            }
-            CASE_9_16 : {
+            case 1: // 9..16 bytes
+            {
                memcpy(&n[0], p, 8);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[1], lp, 8);
@@ -291,7 +250,8 @@ public:
                keyHolderDiscardKey(key_holder);
                return func(m2, k16, hash(k16));
            }
-            CASE_17_24 : {
+            case 2: // 17..24 bytes
+            {
                memcpy(&n[0], p, 16);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[2], lp, 8);
@@ -299,7 +259,8 @@ public:
                keyHolderDiscardKey(key_holder);
                return func(m3, k24, hash(k24));
            }
-            default: {
+            default: // >= 25 bytes
+            {
                return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
            }
        }

--- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
+++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h
@@ -69,23 +69,23 @@ public:
        }
    }
-    // Dispatch is written in a way that maximizes the performance:
+    // This function is mostly the same as StringHashTable::dispatch, but with
-    // 1. Always memcpy 8 times bytes
+    // added bucket computation. See the comments there.
-    // 2. Use switch case extension to generate fast dispatching table
-    // 3. Combine hash computation along with bucket computation and key loading
-    // 4. Funcs are named callables that can be force_inlined
-    // NOTE: It relies on Little Endianness and SSE4.2
    template <typename Func, typename KeyHolder>
    decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
    {
-        static constexpr StringKey0 key0{};
        const StringRef & x = keyHolderGetKey(key_holder);
-        size_t sz = x.size;
+        const size_t sz = x.size;
+        if (sz == 0)
+        {
+            static constexpr StringKey0 key0{};
+            keyHolderDiscardKey(key_holder);
+            return func(impls[0].m0, key0, 0);
+        }
        const char * p = x.data;
        // pending bits that needs to be shifted out
-        char s = (-sz & 7) * 8;
+        const char s = (-sz & 7) * 8;
-        size_t res = -1ULL;
-        size_t buck;
        union
        {
            StringKey8 k8;
@@ -94,12 +94,10 @@ public:
            UInt64 n[3];
        };
        StringHashTableHash hash;
-        switch (sz)
+        switch ((sz - 1) >> 3)
        {
            case 0:
-                keyHolderDiscardKey(key_holder);
+            {
-                return func(impls[0].m0, key0, 0);
-            CASE_1_8 : {
                // first half page
                if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
                {
@@ -112,34 +110,37 @@ public:
                    memcpy(&n[0], lp, 8);
                    n[0] >>= s;
                }
-                res = hash(k8);
+                auto res = hash(k8);
-                buck = getBucketFromHash(res);
+                auto buck = getBucketFromHash(res);
                keyHolderDiscardKey(key_holder);
                return func(impls[buck].m1, k8, res);
            }
-            CASE_9_16 : {
+            case 1:
+            {
                memcpy(&n[0], p, 8);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[1], lp, 8);
                n[1] >>= s;
-                res = hash(k16);
+                auto res = hash(k16);
-                buck = getBucketFromHash(res);
+                auto buck = getBucketFromHash(res);
                keyHolderDiscardKey(key_holder);
                return func(impls[buck].m2, k16, res);
            }
-            CASE_17_24 : {
+            case 2:
+            {
                memcpy(&n[0], p, 16);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[2], lp, 8);
                n[2] >>= s;
-                res = hash(k24);
+                auto res = hash(k24);
-                buck = getBucketFromHash(res);
+                auto buck = getBucketFromHash(res);
                keyHolderDiscardKey(key_holder);
                return func(impls[buck].m3, k24, res);
            }
-            default: {
+            default:
-                res = hash(x);
+            {
-                buck = getBucketFromHash(res);
+                auto res = hash(x);
+                auto buck = getBucketFromHash(res);
                return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
            }
        }