Merge pull request #5304 from 4ertus2/funcs

Support quantile(s) and median for Decimal

Merge pull request #5304 from 4ertus2/funcs
Support quantile(s) and median for Decimal
34f36e28 · alexey-milovidov · GitHub · e0b90de4 · fdcbf509 · 34f36e28
6 changed file
--- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -43,8 +43,12 @@ template <typename Value, bool FloatReturn> using FuncQuantilesTDigestWeighted =
 template <template <typename, bool> class Function>
 static constexpr bool supportDecimal()
 {
-    return std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
-        std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>>;
+    return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantileExactWeighted<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
 }


@@ -66,9 +70,9 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c

    if constexpr (supportDecimal<Function>())
    {
-        if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, true>>(argument_type, params);
-        if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, true>>(argument_type, params);
-        if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, true>>(argument_type, params);
+        if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_type, params);
+        if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_type, params);
+        if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_type, params);
    }

    throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,

--- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h
+++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h
@@ -20,12 +20,22 @@ namespace ErrorCodes
 template <typename Value>
 struct QuantileExactWeighted
 {
+    struct Int128Hash
+    {
+        size_t operator()(Int128 x) const
+        {
+            return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
+        }
+    };
+
    using Weight = UInt64;
+    using UnderlyingType = typename NativeType<Value>::Type;
+    using Hasher = std::conditional_t<std::is_same_v<Value, Decimal128>, Int128Hash, HashCRC32<UnderlyingType>>;

    /// When creating, the hash table must be small.
    using Map = HashMap<
-        Value, Weight,
-        HashCRC32<Value>,
+        UnderlyingType, Weight,
+        Hasher,
        HashTableGrower<4>,
        HashTableAllocatorWithStackMemory<sizeof(std::pair<Value, Weight>) * (1 << 3)>
    >;
@@ -39,7 +49,7 @@ struct QuantileExactWeighted
            ++map[x];
    }

-    void add(const Value & x, const Weight & weight)
+    void add(const Value & x, Weight weight)
    {
        if (!isNaN(x))
            map[x] += weight;

--- a/dbms/src/Core/Types.h
+++ b/dbms/src/Core/Types.h
@@ -165,6 +165,11 @@ template <> constexpr bool IsDecimalNumber<Decimal32> = true;
 template <> constexpr bool IsDecimalNumber<Decimal64> = true;
 template <> constexpr bool IsDecimalNumber<Decimal128> = true;

+template <typename T> struct NativeType { using Type = T; };
+template <> struct NativeType<Decimal32> { using Type = Int32; };
+template <> struct NativeType<Decimal64> { using Type = Int64; };
+template <> struct NativeType<Decimal128> { using Type = Int128; };
+
 }

 /// Specialization of `std::hash` for the Decimal<T> types.

--- a/dbms/src/Functions/FunctionBinaryArithmetic.h
+++ b/dbms/src/Functions/FunctionBinaryArithmetic.h
@@ -98,11 +98,6 @@ template <typename, typename> struct GreatestBaseImpl;
 template <typename, typename> struct ModuloImpl;


-template <typename T> struct NativeType { using Type = T; };
-template <> struct NativeType<Decimal32> { using Type = Int32; };
-template <> struct NativeType<Decimal64> { using Type = Int64; };
-template <> struct NativeType<Decimal128> { using Type = Int128; };
-
 /// Binary operations for Decimals need scale args
 /// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2);
 /// *   no agrs scale. ScaleR = Scale1 + Scale2;

--- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference
+++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.reference
@@ -15,8 +15,30 @@
 1.0000	1.0000	0.33333333	0.33333333	0.20000000	0.20000000
 50.0000	50.0000	16.66666666	16.66666666	10.00000000	10.00000000
 -1.0000	-1.0000	-0.33333333	-0.33333333	-0.20000000	-0.20000000
+0.0000	0.00000000	0.00000000	Decimal(38, 8)
+-25.5000	-8.49999999	-5.10000000	Decimal(38, 8)
 0.0000	0.00000000	0.00000000
-25.0000	-8.33333333	-5.00000000
+10.0000	3.33333333	2.00000000
+20.0000	6.66666666	4.00000000
+30.0000	10.00000000	6.00000000
+40.0000	13.33333333	8.00000000
+50.0000	16.66666666	10.00000000
+[-50.0000,-40.0000,-30.0000,-20.0000,-10.0000,0.0000,10.0000,20.0000,30.0000,40.0000,50.0000]
+[-16.66666666,-13.33333333,-10.00000000,-6.66666666,-3.33333333,0.00000000,3.33333333,6.66666666,10.00000000,13.33333333,16.66666666]
+[-10.00000000,-8.00000000,-6.00000000,-4.00000000,-2.00000000,0.00000000,2.00000000,4.00000000,6.00000000,8.00000000,10.00000000]
+0.0000	0.00000000	0.00000000	Decimal(38, 8)
+-25.0000	-8.33333333	-5.00000000	Decimal(38, 8)
+0.0000	0.00000000	0.00000000
+10.0000	3.33333333	2.00000000
+20.0000	6.66666666	4.00000000
+30.0000	10.00000000	6.00000000
+40.0000	13.33333333	8.00000000
+50.0000	16.66666666	10.00000000
+[-50.0000,-40.0000,-30.0000,-20.0000,-10.0000,0.0000,10.0000,20.0000,30.0000,40.0000,50.0000]
+[-16.66666666,-13.33333333,-10.00000000,-6.66666666,-3.33333333,0.00000000,3.33333333,6.66666666,10.00000000,13.33333333,16.66666666]
+[-10.00000000,-8.00000000,-6.00000000,-4.00000000,-2.00000000,0.00000000,2.00000000,4.00000000,6.00000000,8.00000000,10.00000000]
+0.0000	0.00000000	0.00000000	Decimal(38, 8)
+-26.0000	-8.66666666	-5.20000000	Decimal(38, 8)
 0.0000	0.00000000	0.00000000
 10.0000	3.33333333	2.00000000
 20.0000	6.66666666	4.00000000

--- a/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql
+++ b/dbms/tests/queries/0_stateless/00700_decimal_aggregates.sql
@@ -39,8 +39,20 @@ SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), arg
 SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal;
 SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM decimal WHERE a < 0;

-SELECT medianExact(a), medianExact(b), medianExact(c) FROM decimal;
-SELECT quantileExact(a), quantileExact(b), quantileExact(c) FROM decimal WHERE a < 0;
+SELECT median(a), median(b), median(c) as x, toTypeName(x) FROM decimal;
+SELECT quantile(a), quantile(b), quantile(c) as x, toTypeName(x) FROM decimal WHERE a < 0;
+SELECT quantile(0.0)(a), quantile(0.0)(b), quantile(0.0)(c) FROM decimal WHERE a >= 0;
+SELECT quantile(0.2)(a), quantile(0.2)(b), quantile(0.2)(c) FROM decimal WHERE a >= 0;
+SELECT quantile(0.4)(a), quantile(0.4)(b), quantile(0.4)(c) FROM decimal WHERE a >= 0;
+SELECT quantile(0.6)(a), quantile(0.6)(b), quantile(0.6)(c) FROM decimal WHERE a >= 0;
+SELECT quantile(0.8)(a), quantile(0.8)(b), quantile(0.8)(c) FROM decimal WHERE a >= 0;
+SELECT quantile(1.0)(a), quantile(1.0)(b), quantile(1.0)(c) FROM decimal WHERE a >= 0;
+SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a) FROM decimal;
+SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b) FROM decimal;
+SELECT quantiles(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c) FROM decimal;
+
+SELECT medianExact(a), medianExact(b), medianExact(c) as x, toTypeName(x) FROM decimal;
+SELECT quantileExact(a), quantileExact(b), quantileExact(c) as x, toTypeName(x) FROM decimal WHERE a < 0;
 SELECT quantileExact(0.0)(a), quantileExact(0.0)(b), quantileExact(0.0)(c) FROM decimal WHERE a >= 0;
 SELECT quantileExact(0.2)(a), quantileExact(0.2)(b), quantileExact(0.2)(c) FROM decimal WHERE a >= 0;
 SELECT quantileExact(0.4)(a), quantileExact(0.4)(b), quantileExact(0.4)(c) FROM decimal WHERE a >= 0;
@@ -51,6 +63,18 @@ SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a)
 SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b) FROM decimal;
 SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c) FROM decimal;

+SELECT medianExactWeighted(a, 1), medianExactWeighted(b, 2), medianExactWeighted(c, 3) as x, toTypeName(x) FROM decimal;
+SELECT quantileExactWeighted(a, 1), quantileExactWeighted(b, 2), quantileExactWeighted(c, 3) as x, toTypeName(x) FROM decimal WHERE a < 0;
+SELECT quantileExactWeighted(0.0)(a, 1), quantileExactWeighted(0.0)(b, 2), quantileExactWeighted(0.0)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileExactWeighted(0.2)(a, 1), quantileExactWeighted(0.2)(b, 2), quantileExactWeighted(0.2)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileExactWeighted(0.4)(a, 1), quantileExactWeighted(0.4)(b, 2), quantileExactWeighted(0.4)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileExactWeighted(0.6)(a, 1), quantileExactWeighted(0.6)(b, 2), quantileExactWeighted(0.6)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileExactWeighted(0.8)(a, 1), quantileExactWeighted(0.8)(b, 2), quantileExactWeighted(0.8)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantileExactWeighted(1.0)(a, 1), quantileExactWeighted(1.0)(b, 2), quantileExactWeighted(1.0)(c, 3) FROM decimal WHERE a >= 0;
+SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a, 1) FROM decimal;
+SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b, 2) FROM decimal;
+SELECT quantilesExactWeighted(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c, 3) FROM decimal;
+
 SELECT varPop(a) AS va, varPop(b) AS vb, varPop(c) AS vc, toTypeName(va), toTypeName(vb), toTypeName(vc) FROM decimal;
 SELECT varPop(toFloat64(a)), varPop(toFloat64(b)), varPop(toFloat64(c)) FROM decimal;
 SELECT varSamp(a) AS va, varSamp(b) AS vb, varSamp(c) AS vc, toTypeName(va), toTypeName(vb), toTypeName(vc) FROM decimal;