From 1cc27117b923daea37486e50c311ea3edbb877f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Jan 2016 03:45:19 +0300 Subject: [PATCH] dbms: porting to aarch64 [#METR-19609]. --- dbms/include/DB/Common/ARMHelpers.h | 11 +++ dbms/include/DB/Common/ArenaWithFreeLists.h | 10 +-- dbms/include/DB/Common/StringSearcher.h | 87 ++++++++++++------- dbms/include/DB/Common/UTF8Helpers.h | 7 +- dbms/include/DB/Common/Volnitsky.h | 1 - dbms/include/DB/Functions/FunctionsRound.h | 96 +++++++++++++++++++-- 6 files changed, 166 insertions(+), 46 deletions(-) create mode 100644 dbms/include/DB/Common/ARMHelpers.h diff --git a/dbms/include/DB/Common/ARMHelpers.h b/dbms/include/DB/Common/ARMHelpers.h new file mode 100644 index 0000000000..268855f788 --- /dev/null +++ b/dbms/include/DB/Common/ARMHelpers.h @@ -0,0 +1,11 @@ +#pragma once + + +#if !defined(__x86_64__) + +inline unsigned int _bit_scan_reverse(unsigned int x) +{ + return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x); +} + +#endif diff --git a/dbms/include/DB/Common/ArenaWithFreeLists.h b/dbms/include/DB/Common/ArenaWithFreeLists.h index 135a9007e1..b4c30899e6 100644 --- a/dbms/include/DB/Common/ArenaWithFreeLists.h +++ b/dbms/include/DB/Common/ArenaWithFreeLists.h @@ -1,15 +1,7 @@ #pragma once #include - -#if !defined(__x86_64__) - -inline unsigned int _bit_scan_reverse(unsigned int x) -{ - return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x); -} - -#endif +#include namespace DB diff --git a/dbms/include/DB/Common/StringSearcher.h b/dbms/include/DB/Common/StringSearcher.h index b6c0a376d0..8930a7bd65 100644 --- a/dbms/include/DB/Common/StringSearcher.h +++ b/dbms/include/DB/Common/StringSearcher.h @@ -7,6 +7,10 @@ #include #include +#if defined(__x86_64__) + #include +#endif + namespace DB { @@ -18,18 +22,30 @@ namespace ErrorCodes } +struct StringSearcherBase +{ +#if defined(__x86_64__) + static constexpr auto n = sizeof(__m128i); + const int page_size = getpagesize(); + + bool page_safe(const void * const ptr) const + { + return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; + } +#endif +}; + + /// Performs case-sensitive and case-insensitive search of UTF-8 strings template class StringSearcher; /// Case-insensitive UTF-8 searcher -template <> class StringSearcher +template <> +class StringSearcher : private StringSearcherBase { +private: using UTF8SequenceBuffer = UInt8[6]; - static constexpr auto n = sizeof(__m128i); - - const int page_size = getpagesize(); - /// string to be searched for const UInt8 * const needle; const std::size_t needle_size; @@ -38,6 +54,8 @@ template <> class StringSearcher bool first_needle_symbol_is_ascii{}; UInt8 l{}; UInt8 u{}; + +#if defined(__x86_64__) /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol __m128i patl, patu; /// lower and uppercase vectors of first 16 characters of `needle` @@ -45,11 +63,7 @@ template <> class StringSearcher int cachemask{}; std::size_t cache_valid_len{}; std::size_t cache_actual_len{}; - - bool page_safe(const void * const ptr) const - { - return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; - } +#endif public: StringSearcher(const char * const needle_, const std::size_t needle_size) @@ -80,6 +94,7 @@ public: u = u_seq[0]; } +#if defined(__x86_64__) /// for detecting leftmost position of the first symbol patl = _mm_set1_epi8(l); patu = _mm_set1_epi8(u); @@ -133,12 +148,14 @@ public: } } } +#endif } bool compare(const UInt8 * pos) const { static const Poco::UTF8Encoding utf8; +#if defined(__x86_64__) if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -172,6 +189,7 @@ public: return false; } +#endif if (*pos == l || *pos == u) { @@ -202,6 +220,7 @@ public: while (haystack < haystack_end) { +#if defined(__x86_64__) if (haystack + n <= haystack_end && page_safe(haystack)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); @@ -257,6 +276,7 @@ public: continue; } } +#endif if (haystack == haystack_end) return haystack_end; @@ -286,13 +306,12 @@ public: } }; + /// Case-insensitive ASCII searcher -template <> class StringSearcher +template <> +class StringSearcher : private StringSearcherBase { - static constexpr auto n = sizeof(__m128i); - - const int page_size = getpagesize(); - +private: /// string to be searched for const UInt8 * const needle; const std::size_t needle_size; @@ -300,16 +319,14 @@ template <> class StringSearcher /// lower and uppercase variants of the first character in `needle` UInt8 l{}; UInt8 u{}; + +#if defined(__x86_64__) /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol __m128i patl, patu; /// lower and uppercase vectors of first 16 characters of `needle` __m128i cachel = _mm_setzero_si128(), cacheu = _mm_setzero_si128(); int cachemask{}; - - bool page_safe(const void * const ptr) const - { - return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; - } +#endif public: StringSearcher(const char * const needle_, const std::size_t needle_size) @@ -321,6 +338,7 @@ public: l = static_cast(std::tolower(*needle)); u = static_cast(std::toupper(*needle)); +#if defined(__x86_64__) patl = _mm_set1_epi8(l); patu = _mm_set1_epi8(u); @@ -339,10 +357,12 @@ public: ++needle_pos; } } +#endif } bool compare(const UInt8 * pos) const { +#if defined(__x86_64__) if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -370,6 +390,7 @@ public: return false; } +#endif if (*pos == l || *pos == u) { @@ -393,6 +414,7 @@ public: while (haystack < haystack_end) { +#if defined(__x86_64__) if (haystack + n <= haystack_end && page_safe(haystack)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); @@ -441,6 +463,7 @@ public: continue; } } +#endif if (haystack == haystack_end) return haystack_end; @@ -465,29 +488,26 @@ public: } }; + /// Case-sensitive searcher (both ASCII and UTF-8) -template class StringSearcher +template +class StringSearcher : private StringSearcherBase { - static constexpr auto n = sizeof(__m128i); - - const int page_size = getpagesize(); - +private: /// string to be searched for const UInt8 * const needle; const std::size_t needle_size; const UInt8 * const needle_end = needle + needle_size; /// first character in `needle` UInt8 first{}; + +#if defined(__x86_64__) /// vector filled `first` for determining leftmost position of the first symbol __m128i pattern; /// vector of first 16 characters of `needle` __m128i cache = _mm_setzero_si128(); int cachemask{}; - - bool page_safe(const void * const ptr) const - { - return ((page_size - 1) & reinterpret_cast(ptr)) <= page_size - n; - } +#endif public: StringSearcher(const char * const needle_, const std::size_t needle_size) @@ -497,6 +517,8 @@ public: return; first = *needle; + +#if defined(__x86_64__) pattern = _mm_set1_epi8(first); auto needle_pos = needle; @@ -512,10 +534,12 @@ public: ++needle_pos; } } +#endif } bool compare(const UInt8 * pos) const { +#if defined(__x86_64__) if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -541,6 +565,7 @@ public: return false; } +#endif if (*pos == first) { @@ -564,6 +589,7 @@ public: while (haystack < haystack_end) { +#if defined(__x86_64__) if (haystack + n <= haystack_end && page_safe(haystack)) { /// find first character @@ -611,6 +637,7 @@ public: continue; } } +#endif if (haystack == haystack_end) return haystack_end; diff --git a/dbms/include/DB/Common/UTF8Helpers.h b/dbms/include/DB/Common/UTF8Helpers.h index 40b201a9ba..d75de2d815 100644 --- a/dbms/include/DB/Common/UTF8Helpers.h +++ b/dbms/include/DB/Common/UTF8Helpers.h @@ -1,7 +1,12 @@ #pragma once #include -#include + +#if defined(__x86_64__) + #include +#else + #include +#endif namespace DB diff --git a/dbms/include/DB/Common/Volnitsky.h b/dbms/include/DB/Common/Volnitsky.h index 7e40469df1..8b35d1956c 100644 --- a/dbms/include/DB/Common/Volnitsky.h +++ b/dbms/include/DB/Common/Volnitsky.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/dbms/include/DB/Functions/FunctionsRound.h b/dbms/include/DB/Functions/FunctionsRound.h index 72c413d690..5be7bbb82c 100644 --- a/dbms/include/DB/Functions/FunctionsRound.h +++ b/dbms/include/DB/Functions/FunctionsRound.h @@ -5,6 +5,10 @@ #include #include +#if defined(__x86_64__) + #include +#endif + namespace DB { @@ -13,6 +17,7 @@ namespace DB * roundToExp2 - вниз до ближайшей степени двойки; * roundDuration - вниз до ближайшего из: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000; * roundAge - вниз до ближайшего из: 0, 18, 25, 35, 45. + * * round(x, N) - арифметическое округление (N = 0 по умолчанию). * ceil(x, N) - наименьшее число, которое не меньше x (N = 0 по умолчанию). * floor(x, N) - наибольшее число, которое не больше x (N = 0 по умолчанию). @@ -257,10 +262,11 @@ namespace DB } }; - template +#if defined(__x86_64__) + template class BaseFloatRoundingComputation; - template<> + template <> class BaseFloatRoundingComputation { public: @@ -298,7 +304,7 @@ namespace DB } }; - template<> + template <> class BaseFloatRoundingComputation { public: @@ -522,6 +528,85 @@ namespace DB _mm_storeu_pd(out, val); } }; +#else + /// Реализация для ARM. Не векторизована. Не исправляет отрицательные нули. + + #define _MM_FROUND_NINT 0 + #define _MM_FROUND_FLOOR 1 + #define _MM_FROUND_CEIL 2 + + template + float roundWithMode(float x) + { + if (mode == _MM_FROUND_NINT) return roundf(x); + if (mode == _MM_FROUND_FLOOR) return floorf(x); + if (mode == _MM_FROUND_CEIL) return ceilf(x); + __builtin_unreachable(); + } + + template + double roundWithMode(double x) + { + if (mode == _MM_FROUND_NINT) return round(x); + if (mode == _MM_FROUND_FLOOR) return floor(x); + if (mode == _MM_FROUND_CEIL) return ceil(x); + __builtin_unreachable(); + } + + template + class BaseFloatRoundingComputation + { + public: + using Scale = T; + static const size_t data_count = 1; + + static inline void prepare(size_t scale, Scale & mm_scale) + { + mm_scale = static_cast(scale); + } + }; + + template + class FloatRoundingComputation; + + template + class FloatRoundingComputation + : public BaseFloatRoundingComputation + { + public: + static inline void compute(const T * __restrict in, const T & scale, T * __restrict out) + { + out[0] = roundWithMode(in[0] * scale) / scale; + } + }; + + template + class FloatRoundingComputation + : public BaseFloatRoundingComputation + { + public: + static inline void compute(const T * __restrict in, const T & scale, T * __restrict out) + { + out[0] = roundWithMode(in[0] / scale) * scale; + } + }; + + template + class FloatRoundingComputation + : public BaseFloatRoundingComputation + { + public: + static inline void prepare(size_t scale, T & mm_scale) + { + } + + static inline void compute(const T * __restrict in, const T & scale, T * __restrict out) + { + out[0] = roundWithMode(in[0]); + } + }; +#endif + /** Реализация высокоуровневых функций округления. */ @@ -906,7 +991,7 @@ namespace /** Выбрать подходящий алгоритм обработки в зависимости от масштаба. */ - template class U, int rounding_mode> + template class U, int rounding_mode> struct Dispatcher { static inline void apply(Block & block, U * col, const ColumnNumbers & arguments, size_t result) @@ -1053,9 +1138,10 @@ namespace typedef FunctionUnaryArithmetic FunctionRoundToExp2; typedef FunctionUnaryArithmetic FunctionRoundDuration; typedef FunctionUnaryArithmetic FunctionRoundAge; + typedef FunctionRounding FunctionRound; - typedef FunctionRounding FunctionCeil; typedef FunctionRounding FunctionFloor; + typedef FunctionRounding FunctionCeil; struct PositiveMonotonicity -- GitLab