提交 c8633d8b 编写于 作者: I Igor Chevdar 提交者: Vasily Levchenko

[runtime] Vectorized polynomial hash for strings

This is for consistency with Kotlin/JVM
上级 95a92d86
......@@ -28,6 +28,8 @@
#include "utf8.h"
#include "polyhash/PolyHash.h"
namespace {
typedef std::back_insert_iterator<KStdString> KStdStringInserter;
......@@ -1165,10 +1167,7 @@ KInt Kotlin_String_lastIndexOfString(KString thiz, KString other, KInt fromIndex
KInt Kotlin_String_hashCode(KString thiz) {
// TODO: consider caching strings hashes.
// TODO: maybe use some simpler hashing algorithm?
// Note that we don't use Java's string hash.
return CityHash64(
CharArrayAddressOfElementAt(thiz, 0), thiz->count_ * sizeof(KChar));
return polyHash(thiz->count_, CharArrayAddressOfElementAt(thiz, 0));
}
const KChar* Kotlin_String_utf16pointer(KString message) {
......
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#include "polyhash/PolyHash.h"
#include "polyhash/naive.h"
#include "polyhash/x86.h"
#include "polyhash/arm.h"
int polyHash(int length, uint16_t const* str) {
#if defined(__x86_64__) or defined(__i386__)
return polyHash_x86(length, str);
#elif defined(__arm__) or defined(__aarch64__)
return polyHash_arm(length, str);
#else
return polyHash_naive(length, str);
#endif
}
\ No newline at end of file
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#ifndef RUNTIME_POLYHASH_H
#define RUNTIME_POLYHASH_H
#include <stdint.h>
// Computes polynomial hash with base = 31.
int polyHash(int length, uint16_t const* str);
#endif // RUNTIME_POLYHASH_H
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#include "polyhash/PolyHash.h"
#include "polyhash/naive.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
TEST(PolyHashTest, Correctness) {
const int maxLength = 10000;
uint16_t str[maxLength + 100];
for (int k = 1; k <= maxLength; ++k) {
for (int i = 0; i < k; ++i)
str[i] = k * maxLength + i;
str[k] = 0;
for (int shift = 0; shift < 8 && k - shift > 0; ++shift)
EXPECT_EQ(polyHash_naive(k - shift, str + shift), polyHash(k - shift, str + shift));
}
}
}
\ No newline at end of file
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#include "polyhash/common.h"
#include "polyhash/arm.h"
#if defined(__arm__) or defined(__aarch64__)
#ifndef __ARM_NEON
int polyHash_arm(int length, uint16_t const* str) {
return polyHash_naive(length, str);
}
#else
#include <arm_neon.h>
namespace {
alignas(32) constexpr auto p32 = DecreasingPowers<32>(31); // [base^31, base^30, .., base^2, base, 1]
alignas(32) constexpr auto b32 = RepeatingPowers<8>(31, 32); // [base^32, base^32, .., base^32] (8)
alignas(32) constexpr auto b16 = RepeatingPowers<8>(31, 16); // [base^16, base^16, .., base^16] (8)
alignas(32) constexpr auto b8 = RepeatingPowers<8>(31, 8); // [base^8, base^8, .., base^8 ] (8)
alignas(32) constexpr auto b4 = RepeatingPowers<8>(31, 4); // [base^4, base^4, .., base^4 ] (8)
struct NeonTraits {
using VecType = uint32x4_t;
using Vec128Type = uint32x4_t;
using U16VecType = uint16x4_t;
ALWAYS_INLINE static VecType initVec() { return vdupq_n_u32(0); }
ALWAYS_INLINE static Vec128Type initVec128() { return vdupq_n_u32(0); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return vgetq_lane_u32(x, 0); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return vmovl_u16(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return vmulq_u32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return vaddq_u32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return vmulq_u32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return vaddq_u32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
return squash1(vaddq_u32(x, y)); // [x0 + y0, x1 + y1, x2 + y2, x3 + y3]
}
ALWAYS_INLINE static uint32x4_t squash1(uint32x4_t z) {
#ifdef __aarch64__
return vdupq_n_u32(vaddvq_u32(z)); // [z0..3, same, same, same]
#else
uint32x2_t lo = vget_low_u32(z); // [z0, z1]
uint32x2_t hi = vget_high_u32(z); // [z2, z3]
uint32x2_t sum = vadd_u32(lo, hi); // [z0 + z2, z1 + z3]
sum = vpadd_u32(sum, sum); // [z0..3, same]
return vcombine_u32(sum, sum); // [z0..3, same, same, same]
#endif
};
static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll4<NeonTraits>(n, str, res, &b16[0], &p32[16]);
polyHashUnroll2<NeonTraits>(n, str, res, &b8[0], &p32[24]);
polyHashTail<NeonTraits>(n, str, res, &b4[0], &p32[28]);
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll8<NeonTraits>(n, str, res, &b32[0], &p32[0]);
polyHashUnroll4<NeonTraits>(n, str, res, &b16[0], &p32[16]);
polyHashUnroll2<NeonTraits>(n, str, res, &b8[0], &p32[24]);
polyHashTail<NeonTraits>(n, str, res, &b4[0], &p32[28]);
return vec128toInt(res);
}
};
#if defined(__aarch64__)
const bool neonSupported = true; // AArch64 always supports Neon.
#elif defined(__ANDROID__)
#include <cpu-features.h>
const bool neonSupported = android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON;
#elif defined(__APPLE__)
const bool neonSupported = true; // It is supported starting from iPhone 3GS.
#elif defined(__linux__) or defined(__unix__)
#include <sys/auxv.h>
#include <asm/hwcap.h>
const bool neonSupported = getauxval(AT_HWCAP) & HWCAP_NEON;
#else
#error "Not supported"
#endif
}
int polyHash_arm(int length, uint16_t const* str) {
if (!neonSupported) {
// Vectorization is not supported.
return polyHash_naive(length, str);
}
int res;
if (length < 488)
res = NeonTraits::polyHashUnalignedUnrollUpTo16(length / 4, str);
else
res = NeonTraits::polyHashUnalignedUnrollUpTo32(length / 4, str);
// Handle the tail naively.
for (int i = length & 0xFFFFFFFC; i < length; ++i)
res = res * 31 + str[i];
return res;
}
#endif // __ARM_NEON
#endif // defined(__arm__) or defined(__aarch64__)
\ No newline at end of file
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#ifndef RUNTIME_POLYHASH_ARM_H
#define RUNTIME_POLYHASH_ARM_H
int polyHash_arm(int length, uint16_t const* str);
#endif // RUNTIME_POLYHASH_ARM_H
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#ifndef RUNTIME_POLYHASH_COMMON_H
#define RUNTIME_POLYHASH_COMMON_H
#include <array>
#include <cstdint>
#include "polyhash/naive.h"
#include "../Common.h"
constexpr uint32_t Power(uint32_t base, uint8_t exponent) {
uint32_t result = 1;
for (uint8_t i = 0; i < exponent; ++i) {
result *= base;
}
return result;
}
template <uint8_t Exponent>
constexpr std::array<uint32_t, Exponent> DecreasingPowers(uint32_t base) {
std::array<uint32_t, Exponent> result = {};
uint32_t current = 1;
for (auto it = result.rbegin(); it != result.rend(); ++it) {
*it = current;
current *= base;
}
return result;
}
template <size_t Count>
constexpr std::array<uint32_t, Count> RepeatingPowers(uint32_t base, uint8_t exponent) {
std::array<uint32_t, Count> result = {};
uint32_t value = Power(base, exponent);
for (auto& element : result)
element = value;
return result;
}
#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
template<typename Traits>
ALWAYS_INLINE void polyHashTail(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
using Vec128Type = typename Traits::Vec128Type;
using U16VecType = typename Traits::U16VecType;
const int vecLength = sizeof(VecType) / 4;
if (n < vecLength / 4) return;
VecType x = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
res = Traits::vec128Mul(res, *reinterpret_cast<Vec128Type const*>(b));
VecType z = Traits::vecMul(x, *reinterpret_cast<VecType const*>(p));
res = Traits::vec128Add(res, Traits::squash1(z));
str += vecLength;
n -= vecLength / 4;
}
template<typename Traits>
ALWAYS_INLINE void polyHashUnroll2(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
using Vec128Type = typename Traits::Vec128Type;
using U16VecType = typename Traits::U16VecType;
const int vecLength = sizeof(VecType) / 4;
if (n < vecLength / 2) return;
res = Traits::vec128Mul(res, *reinterpret_cast<Vec128Type const*>(b));
VecType res0 = Traits::initVec();
VecType res1 = Traits::initVec();
do {
VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
VecType z0 = Traits::vecMul(x0, *reinterpret_cast<VecType const*>(p));
VecType z1 = Traits::vecMul(x1, *reinterpret_cast<VecType const*>(p + vecLength));
res0 = Traits::vecAdd(res0, z0);
res1 = Traits::vecAdd(res1, z1);
str += vecLength * 2;
n -= vecLength / 2;
} while (n >= vecLength / 2);
res = Traits::vec128Add(res, Traits::squash2(res0, res1));
}
template<typename Traits>
ALWAYS_INLINE void polyHashUnroll4(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
using Vec128Type = typename Traits::Vec128Type;
using U16VecType = typename Traits::U16VecType;
const int vecLength = sizeof(VecType) / 4;
if (n < vecLength) return;
res = Traits::vec128Mul(res, *reinterpret_cast<Vec128Type const*>(b));
VecType res0 = Traits::initVec();
VecType res1 = Traits::initVec();
VecType res2 = Traits::initVec();
VecType res3 = Traits::initVec();
do {
VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2));
VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b));
res3 = Traits::vecMul(res3, *reinterpret_cast<VecType const*>(b));
VecType z0 = Traits::vecMul(x0, *reinterpret_cast<VecType const*>(p));
VecType z1 = Traits::vecMul(x1, *reinterpret_cast<VecType const*>(p + vecLength));
VecType z2 = Traits::vecMul(x2, *reinterpret_cast<VecType const*>(p + vecLength * 2));
VecType z3 = Traits::vecMul(x3, *reinterpret_cast<VecType const*>(p + vecLength * 3));
res0 = Traits::vecAdd(res0, z0);
res1 = Traits::vecAdd(res1, z1);
res2 = Traits::vecAdd(res2, z2);
res3 = Traits::vecAdd(res3, z3);
str += vecLength * 4;
n -= vecLength;
} while (n >= vecLength);
res = Traits::vec128Add(res, Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3)));
}
template<typename Traits>
ALWAYS_INLINE void polyHashUnroll8(int& n, uint16_t const*& str, typename Traits::Vec128Type& res, uint32_t const* b, uint32_t const* p) {
using VecType = typename Traits::VecType;
using Vec128Type = typename Traits::Vec128Type;
using U16VecType = typename Traits::U16VecType;
const int vecLength = sizeof(VecType) / 4;
if (n < vecLength * 2) return;
VecType res0 = Traits::initVec();
VecType res1 = Traits::initVec();
VecType res2 = Traits::initVec();
VecType res3 = Traits::initVec();
VecType res4 = Traits::initVec();
VecType res5 = Traits::initVec();
VecType res6 = Traits::initVec();
VecType res7 = Traits::initVec();
do {
VecType x0 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str));
VecType x1 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength));
VecType x2 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 2));
VecType x3 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 3));
VecType x4 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 4));
VecType x5 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 5));
VecType x6 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 6));
VecType x7 = Traits::u16Load(*reinterpret_cast<U16VecType const*>(str + vecLength * 7));
res0 = Traits::vecMul(res0, *reinterpret_cast<VecType const*>(b));
res1 = Traits::vecMul(res1, *reinterpret_cast<VecType const*>(b));
res2 = Traits::vecMul(res2, *reinterpret_cast<VecType const*>(b));
res3 = Traits::vecMul(res3, *reinterpret_cast<VecType const*>(b));
res4 = Traits::vecMul(res4, *reinterpret_cast<VecType const*>(b));
res5 = Traits::vecMul(res5, *reinterpret_cast<VecType const*>(b));
res6 = Traits::vecMul(res6, *reinterpret_cast<VecType const*>(b));
res7 = Traits::vecMul(res7, *reinterpret_cast<VecType const*>(b));
VecType z0 = Traits::vecMul(x0, *reinterpret_cast<VecType const*>(p));
VecType z1 = Traits::vecMul(x1, *reinterpret_cast<VecType const*>(p + vecLength));
VecType z2 = Traits::vecMul(x2, *reinterpret_cast<VecType const*>(p + vecLength * 2));
VecType z3 = Traits::vecMul(x3, *reinterpret_cast<VecType const*>(p + vecLength * 3));
VecType z4 = Traits::vecMul(x4, *reinterpret_cast<VecType const*>(p + vecLength * 4));
VecType z5 = Traits::vecMul(x5, *reinterpret_cast<VecType const*>(p + vecLength * 5));
VecType z6 = Traits::vecMul(x6, *reinterpret_cast<VecType const*>(p + vecLength * 6));
VecType z7 = Traits::vecMul(x7, *reinterpret_cast<VecType const*>(p + vecLength * 7));
res0 = Traits::vecAdd(res0, z0);
res1 = Traits::vecAdd(res1, z1);
res2 = Traits::vecAdd(res2, z2);
res3 = Traits::vecAdd(res3, z3);
res4 = Traits::vecAdd(res4, z4);
res5 = Traits::vecAdd(res5, z5);
res6 = Traits::vecAdd(res6, z6);
res7 = Traits::vecAdd(res7, z7);
str += vecLength * 8;
n -= vecLength * 2;
} while (n >= vecLength * 2);
Vec128Type sum1 = Traits::vec128Add(Traits::squash2(res0, res1), Traits::squash2(res2, res3));
Vec128Type sum2 = Traits::vec128Add(Traits::squash2(res4, res5), Traits::squash2(res6, res7));
res = Traits::vec128Add(res, Traits::vec128Add(sum1, sum2));
}
#if defined(__x86_64__) or defined(__i386__)
#pragma clang attribute pop
#endif
#endif // RUNTIME_POLYHASH_COMMON_H
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#ifndef RUNTIME_POLYHASH_NAIVE_H
#define RUNTIME_POLYHASH_NAIVE_H
#include <cstdint>
inline int polyHash_naive(int length, uint16_t const* str) {
int res = 0;
for (int i = 0; i < length; ++i)
res = res * 31 + str[i];
return res;
}
#endif // RUNTIME_POLYHASH_NAIVE_H
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#include "polyhash/common.h"
#include "polyhash/x86.h"
#if defined(__x86_64__) or defined(__i386__)
#include <immintrin.h>
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
namespace {
alignas(32) constexpr auto p64 = DecreasingPowers<64>(31); // [base^63, base^62, .., base^2, base, 1]
alignas(32) constexpr auto b64 = RepeatingPowers<8>(31, 64); // [base^64, base^64, .., base^64] (8)
alignas(32) constexpr auto b32 = RepeatingPowers<8>(31, 32); // [base^32, base^32, .., base^32] (8)
alignas(32) constexpr auto b16 = RepeatingPowers<8>(31, 16); // [base^16, base^16, .., base^16] (8)
alignas(32) constexpr auto b8 = RepeatingPowers<8>(31, 8); // [base^8, base^8, .., base^8 ] (8)
alignas(32) constexpr auto b4 = RepeatingPowers<8>(31, 4); // [base^4, base^4, .., base^4 ] (8)
struct SSETraits {
using VecType = __m128i;
using Vec128Type = __m128i;
using U16VecType = __m128i;
ALWAYS_INLINE static VecType initVec() { return _mm_setzero_si128(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3]
}
ALWAYS_INLINE static Vec128Type squash1(VecType z) {
VecType sum = _mm_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3]
return _mm_hadd_epi32(sum, sum); // [z0..3, same, same, same]
}
static int polyHashUnalignedUnrollUpTo8(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll2<SSETraits>(n, str, res, &b8[0], &p64[56]);
polyHashTail<SSETraits>(n, str, res, &b4[0], &p64[60]);
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll4<SSETraits>(n, str, res, &b16[0], &p64[48]);
polyHashUnroll2<SSETraits>(n, str, res, &b8[0], &p64[56]);
polyHashTail<SSETraits>(n, str, res, &b4[0], &p64[60]);
return vec128toInt(res);
}
};
struct AVX2Traits {
using VecType = __m256i;
using Vec128Type = __m128i;
using U16VecType = __m128i;
ALWAYS_INLINE static VecType initVec() { return _mm256_setzero_si256(); }
ALWAYS_INLINE static Vec128Type initVec128() { return _mm_setzero_si128(); }
ALWAYS_INLINE static int vec128toInt(Vec128Type x) { return _mm_cvtsi128_si32(x); }
ALWAYS_INLINE static VecType u16Load(U16VecType x) { return _mm256_cvtepu16_epi32(x); }
ALWAYS_INLINE static Vec128Type vec128Mul(Vec128Type x, Vec128Type y) { return _mm_mullo_epi32(x, y); }
ALWAYS_INLINE static Vec128Type vec128Add(Vec128Type x, Vec128Type y) { return _mm_add_epi32(x, y); }
ALWAYS_INLINE static VecType vecMul(VecType x, VecType y) { return _mm256_mullo_epi32(x, y); }
ALWAYS_INLINE static VecType vecAdd(VecType x, VecType y) { return _mm256_add_epi32(x, y); }
ALWAYS_INLINE static Vec128Type squash2(VecType x, VecType y) {
return squash1(_mm256_hadd_epi32(x, y)); // [x0 + x1, x2 + x3, y0 + y1, y2 + y3, x4 + x5, x6 + x7, y4 + y5, y6 + y7]
}
ALWAYS_INLINE static Vec128Type squash1(VecType z) {
VecType sum = _mm256_hadd_epi32(z, z); // [z0 + z1, z2 + z3, z0 + z1, z2 + z3, z4 + z5, z6 + z7, z4 + z5, z6 + z7]
sum = _mm256_hadd_epi32(sum, sum); // [z0..3, z0..3, z0..3, z0..3, z4..7, z4..7, z4..7, z4..7]
Vec128Type lo = _mm256_extracti128_si256(sum, 0); // [z0..3, same, same, same]
Vec128Type hi = _mm256_extracti128_si256(sum, 1); // [z4..7, same, same, same]
return _mm_add_epi32(lo, hi); // [z0..7, same, same, same]
}
static int polyHashUnalignedUnrollUpTo16(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll2<AVX2Traits>(n, str, res, &b16[0], &p64[48]);
polyHashTail<AVX2Traits>(n, str, res, &b8[0], &p64[56]);
polyHashTail<SSETraits>(n, str, res, &b4[0], &p64[60]);
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo32(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll4<AVX2Traits>(n, str, res, &b32[0], &p64[32]);
polyHashUnroll2<AVX2Traits>(n, str, res, &b16[0], &p64[48]);
polyHashTail<AVX2Traits>(n, str, res, &b8[0], &p64[56]);
polyHashTail<SSETraits>(n, str, res, &b4[0], &p64[60]);
return vec128toInt(res);
}
static int polyHashUnalignedUnrollUpTo64(int n, uint16_t const* str) {
Vec128Type res = initVec128();
polyHashUnroll8<AVX2Traits>(n, str, res, &b64[0], &p64[0]);
polyHashUnroll4<AVX2Traits>(n, str, res, &b32[0], &p64[32]);
polyHashUnroll2<AVX2Traits>(n, str, res, &b16[0], &p64[48]);
polyHashTail<AVX2Traits>(n, str, res, &b8[0], &p64[56]);
polyHashTail<SSETraits>(n, str, res, &b4[0], &p64[60]);
return vec128toInt(res);
}
};
#if defined(__x86_64__)
const bool x64 = true;
#else
const bool x64 = false;
#endif
bool initialized = false;
bool sseSupported;
bool avx2Supported;
}
int polyHash_x86(int length, uint16_t const* str) {
if (!initialized) {
initialized = true;
sseSupported = __builtin_cpu_supports("sse4.1");
avx2Supported = __builtin_cpu_supports("avx2");
}
if (length < 16 || (!sseSupported && !avx2Supported)) {
// Either vectorization is not supported or the string is too short to gain from it.
return polyHash_naive(length, str);
}
int res;
if (length < 32)
res = SSETraits::polyHashUnalignedUnrollUpTo8(length / 4, str);
else if (!avx2Supported)
res = SSETraits::polyHashUnalignedUnrollUpTo16(length / 4, str);
else if (length < 128)
res = AVX2Traits::polyHashUnalignedUnrollUpTo16(length / 4, str);
else if (!x64 || length < 576)
res = AVX2Traits::polyHashUnalignedUnrollUpTo32(length / 4, str);
else // Such big unrolling requires 64-bit mode (in 32-bit mode there are only 8 vector registers)
res = AVX2Traits::polyHashUnalignedUnrollUpTo64(length / 4, str);
// Handle the tail naively.
for (int i = length & 0xFFFFFFFC; i < length; ++i)
res = res * 31 + str[i];
return res;
}
#pragma clang attribute pop
#endif
/*
* Copyright 2010-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license
* that can be found in the LICENSE file.
*/
#ifndef RUNTIME_POLYHASH_X86_H
#define RUNTIME_POLYHASH_X86_H
int polyHash_x86(int length, uint16_t const* str);
#endif // RUNTIME_POLYHASH_X86_H
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册