提交 1cc27117 编写于 作者: A Alexey Milovidov

dbms: porting to aarch64 [#METR-19609].

上级 bffc6ce8
#pragma once
#if !defined(__x86_64__)
inline unsigned int _bit_scan_reverse(unsigned int x)
{
return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x);
}
#endif
#pragma once
#include <DB/Common/Arena.h>
#if !defined(__x86_64__)
inline unsigned int _bit_scan_reverse(unsigned int x)
{
return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x);
}
#endif
#include <DB/Common/ARMHelpers.h>
namespace DB
......
......@@ -7,6 +7,10 @@
#include <stdint.h>
#include <string.h>
#if defined(__x86_64__)
#include <smmintrin.h>
#endif
namespace DB
{
......@@ -18,18 +22,30 @@ namespace ErrorCodes
}
struct StringSearcherBase
{
#if defined(__x86_64__)
static constexpr auto n = sizeof(__m128i);
const int page_size = getpagesize();
bool page_safe(const void * const ptr) const
{
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
}
#endif
};
/// Performs case-sensitive and case-insensitive search of UTF-8 strings
template <bool CaseSensitive, bool ASCII> class StringSearcher;
/// Case-insensitive UTF-8 searcher
template <> class StringSearcher<false, false>
template <>
class StringSearcher<false, false> : private StringSearcherBase
{
private:
using UTF8SequenceBuffer = UInt8[6];
static constexpr auto n = sizeof(__m128i);
const int page_size = getpagesize();
/// string to be searched for
const UInt8 * const needle;
const std::size_t needle_size;
......@@ -38,6 +54,8 @@ template <> class StringSearcher<false, false>
bool first_needle_symbol_is_ascii{};
UInt8 l{};
UInt8 u{};
#if defined(__x86_64__)
/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
__m128i patl, patu;
/// lower and uppercase vectors of first 16 characters of `needle`
......@@ -45,11 +63,7 @@ template <> class StringSearcher<false, false>
int cachemask{};
std::size_t cache_valid_len{};
std::size_t cache_actual_len{};
bool page_safe(const void * const ptr) const
{
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
}
#endif
public:
StringSearcher(const char * const needle_, const std::size_t needle_size)
......@@ -80,6 +94,7 @@ public:
u = u_seq[0];
}
#if defined(__x86_64__)
/// for detecting leftmost position of the first symbol
patl = _mm_set1_epi8(l);
patu = _mm_set1_epi8(u);
......@@ -133,12 +148,14 @@ public:
}
}
}
#endif
}
bool compare(const UInt8 * pos) const
{
static const Poco::UTF8Encoding utf8;
#if defined(__x86_64__)
if (page_safe(pos))
{
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
......@@ -172,6 +189,7 @@ public:
return false;
}
#endif
if (*pos == l || *pos == u)
{
......@@ -202,6 +220,7 @@ public:
while (haystack < haystack_end)
{
#if defined(__x86_64__)
if (haystack + n <= haystack_end && page_safe(haystack))
{
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
......@@ -257,6 +276,7 @@ public:
continue;
}
}
#endif
if (haystack == haystack_end)
return haystack_end;
......@@ -286,13 +306,12 @@ public:
}
};
/// Case-insensitive ASCII searcher
template <> class StringSearcher<false, true>
template <>
class StringSearcher<false, true> : private StringSearcherBase
{
static constexpr auto n = sizeof(__m128i);
const int page_size = getpagesize();
private:
/// string to be searched for
const UInt8 * const needle;
const std::size_t needle_size;
......@@ -300,16 +319,14 @@ template <> class StringSearcher<false, true>
/// lower and uppercase variants of the first character in `needle`
UInt8 l{};
UInt8 u{};
#if defined(__x86_64__)
/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
__m128i patl, patu;
/// lower and uppercase vectors of first 16 characters of `needle`
__m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128();
int cachemask{};
bool page_safe(const void * const ptr) const
{
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
}
#endif
public:
StringSearcher(const char * const needle_, const std::size_t needle_size)
......@@ -321,6 +338,7 @@ public:
l = static_cast<UInt8>(std::tolower(*needle));
u = static_cast<UInt8>(std::toupper(*needle));
#if defined(__x86_64__)
patl = _mm_set1_epi8(l);
patu = _mm_set1_epi8(u);
......@@ -339,10 +357,12 @@ public:
++needle_pos;
}
}
#endif
}
bool compare(const UInt8 * pos) const
{
#if defined(__x86_64__)
if (page_safe(pos))
{
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
......@@ -370,6 +390,7 @@ public:
return false;
}
#endif
if (*pos == l || *pos == u)
{
......@@ -393,6 +414,7 @@ public:
while (haystack < haystack_end)
{
#if defined(__x86_64__)
if (haystack + n <= haystack_end && page_safe(haystack))
{
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
......@@ -441,6 +463,7 @@ public:
continue;
}
}
#endif
if (haystack == haystack_end)
return haystack_end;
......@@ -465,29 +488,26 @@ public:
}
};
/// Case-sensitive searcher (both ASCII and UTF-8)
template <bool ASCII> class StringSearcher<true, ASCII>
template <bool ASCII>
class StringSearcher<true, ASCII> : private StringSearcherBase
{
static constexpr auto n = sizeof(__m128i);
const int page_size = getpagesize();
private:
/// string to be searched for
const UInt8 * const needle;
const std::size_t needle_size;
const UInt8 * const needle_end = needle + needle_size;
/// first character in `needle`
UInt8 first{};
#if defined(__x86_64__)
/// vector filled `first` for determining leftmost position of the first symbol
__m128i pattern;
/// vector of first 16 characters of `needle`
__m128i cache = _mm_setzero_si128();
int cachemask{};
bool page_safe(const void * const ptr) const
{
return ((page_size - 1) & reinterpret_cast<std::uintptr_t>(ptr)) <= page_size - n;
}
#endif
public:
StringSearcher(const char * const needle_, const std::size_t needle_size)
......@@ -497,6 +517,8 @@ public:
return;
first = *needle;
#if defined(__x86_64__)
pattern = _mm_set1_epi8(first);
auto needle_pos = needle;
......@@ -512,10 +534,12 @@ public:
++needle_pos;
}
}
#endif
}
bool compare(const UInt8 * pos) const
{
#if defined(__x86_64__)
if (page_safe(pos))
{
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
......@@ -541,6 +565,7 @@ public:
return false;
}
#endif
if (*pos == first)
{
......@@ -564,6 +589,7 @@ public:
while (haystack < haystack_end)
{
#if defined(__x86_64__)
if (haystack + n <= haystack_end && page_safe(haystack))
{
/// find first character
......@@ -611,6 +637,7 @@ public:
continue;
}
}
#endif
if (haystack == haystack_end)
return haystack_end;
......
#pragma once
#include <DB/Core/Types.h>
#include <x86intrin.h>
#if defined(__x86_64__)
#include <x86intrin.h>
#else
#include <DB/Common/ARMHelpers.h>
#endif
namespace DB
......
......@@ -4,7 +4,6 @@
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <ext/range.hpp>
#include <x86intrin.h>
#include <stdint.h>
#include <string.h>
......
......@@ -5,6 +5,10 @@
#include <type_traits>
#include <array>
#if defined(__x86_64__)
#include <smmintrin.h>
#endif
namespace DB
{
......@@ -13,6 +17,7 @@ namespace DB
* roundToExp2 - вниз до ближайшей степени двойки;
* roundDuration - вниз до ближайшего из: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000;
* roundAge - вниз до ближайшего из: 0, 18, 25, 35, 45.
*
* round(x, N) - арифметическое округление (N = 0 по умолчанию).
* ceil(x, N) - наименьшее число, которое не меньше x (N = 0 по умолчанию).
* floor(x, N) - наибольшее число, которое не больше x (N = 0 по умолчанию).
......@@ -257,10 +262,11 @@ namespace DB
}
};
template<typename T>
#if defined(__x86_64__)
template <typename T>
class BaseFloatRoundingComputation;
template<>
template <>
class BaseFloatRoundingComputation<Float32>
{
public:
......@@ -298,7 +304,7 @@ namespace DB
}
};
template<>
template <>
class BaseFloatRoundingComputation<Float64>
{
public:
......@@ -522,6 +528,85 @@ namespace DB
_mm_storeu_pd(out, val);
}
};
#else
/// Реализация для ARM. Не векторизована. Не исправляет отрицательные нули.
#define _MM_FROUND_NINT 0
#define _MM_FROUND_FLOOR 1
#define _MM_FROUND_CEIL 2
template <int mode>
float roundWithMode(float x)
{
if (mode == _MM_FROUND_NINT) return roundf(x);
if (mode == _MM_FROUND_FLOOR) return floorf(x);
if (mode == _MM_FROUND_CEIL) return ceilf(x);
__builtin_unreachable();
}
template <int mode>
double roundWithMode(double x)
{
if (mode == _MM_FROUND_NINT) return round(x);
if (mode == _MM_FROUND_FLOOR) return floor(x);
if (mode == _MM_FROUND_CEIL) return ceil(x);
__builtin_unreachable();
}
template <typename T>
class BaseFloatRoundingComputation
{
public:
using Scale = T;
static const size_t data_count = 1;
static inline void prepare(size_t scale, Scale & mm_scale)
{
mm_scale = static_cast<T>(scale);
}
};
template <typename T, int rounding_mode, ScaleMode scale_mode>
class FloatRoundingComputation;
template <typename T, int rounding_mode>
class FloatRoundingComputation<T, rounding_mode, PositiveScale>
: public BaseFloatRoundingComputation<T>
{
public:
static inline void compute(const T * __restrict in, const T & scale, T * __restrict out)
{
out[0] = roundWithMode<rounding_mode>(in[0] * scale) / scale;
}
};
template <typename T, int rounding_mode>
class FloatRoundingComputation<T, rounding_mode, NegativeScale>
: public BaseFloatRoundingComputation<T>
{
public:
static inline void compute(const T * __restrict in, const T & scale, T * __restrict out)
{
out[0] = roundWithMode<rounding_mode>(in[0] / scale) * scale;
}
};
template <typename T, int rounding_mode>
class FloatRoundingComputation<T, rounding_mode, ZeroScale>
: public BaseFloatRoundingComputation<T>
{
public:
static inline void prepare(size_t scale, T & mm_scale)
{
}
static inline void compute(const T * __restrict in, const T & scale, T * __restrict out)
{
out[0] = roundWithMode<rounding_mode>(in[0]);
}
};
#endif
/** Реализация высокоуровневых функций округления.
*/
......@@ -906,7 +991,7 @@ namespace
/** Выбрать подходящий алгоритм обработки в зависимости от масштаба.
*/
template<typename T, template<typename> class U, int rounding_mode>
template<typename T, template <typename> class U, int rounding_mode>
struct Dispatcher
{
static inline void apply(Block & block, U<T> * col, const ColumnNumbers & arguments, size_t result)
......@@ -1053,9 +1138,10 @@ namespace
typedef FunctionUnaryArithmetic<RoundToExp2Impl, NameRoundToExp2> FunctionRoundToExp2;
typedef FunctionUnaryArithmetic<RoundDurationImpl, NameRoundDuration> FunctionRoundDuration;
typedef FunctionUnaryArithmetic<RoundAgeImpl, NameRoundAge> FunctionRoundAge;
typedef FunctionRounding<NameRound, _MM_FROUND_NINT> FunctionRound;
typedef FunctionRounding<NameCeil, _MM_FROUND_CEIL> FunctionCeil;
typedef FunctionRounding<NameFloor, _MM_FROUND_FLOOR> FunctionFloor;
typedef FunctionRounding<NameCeil, _MM_FROUND_CEIL> FunctionCeil;
struct PositiveMonotonicity
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册