Removed tons of garbage from "greatCircleDistance" function. But algorithm is still unclear.

2474cdfa · Alexey Milovidov · f5e88f8c · 2474cdfa · 2474cdfa · 2474cdfa
7 changed file
--- a/dbms/src/Columns/ColumnConst.h
+++ b/dbms/src/Columns/ColumnConst.h
@@ -105,6 +105,11 @@ public:
        return data->getFloat64(0);
    }

+    Float32 getFloat32(size_t) const override
+    {
+        return data->getFloat32(0);
+    }
+
    bool isNullAt(size_t) const override
    {
        return data->isNullAt(0);

--- a/dbms/src/Columns/ColumnLowCardinality.h
+++ b/dbms/src/Columns/ColumnLowCardinality.h
@@ -59,6 +59,7 @@ public:
    UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
    Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
    Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); }
+    Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); }
    bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); }
    bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
    ColumnPtr cut(size_t start, size_t length) const override

--- a/dbms/src/Columns/ColumnUnique.h
+++ b/dbms/src/Columns/ColumnUnique.h
@@ -66,6 +66,7 @@ public:
    UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); }
    Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); }
    Float64 getFloat64(size_t n) const override { return getNestedColumn()->getFloat64(n); }
+    Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
    bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
    bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;

--- a/dbms/src/Columns/ColumnVector.cpp
+++ b/dbms/src/Columns/ColumnVector.cpp
@@ -222,6 +222,12 @@ Float64 ColumnVector<T>::getFloat64(size_t n) const
    return static_cast<Float64>(data[n]);
 }

+template <typename T>
+Float32 ColumnVector<T>::getFloat32(size_t n) const
+{
+    return static_cast<Float32>(data[n]);
+}
+
 template <typename T>
 void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
 {

--- a/dbms/src/Columns/ColumnVector.h
+++ b/dbms/src/Columns/ColumnVector.h
@@ -205,6 +205,7 @@ public:
    UInt64 get64(size_t n) const override;

    Float64 getFloat64(size_t n) const override;
+    Float32 getFloat32(size_t n) const override;

    UInt64 getUInt(size_t n) const override
    {

--- a/dbms/src/Columns/IColumn.h
+++ b/dbms/src/Columns/IColumn.h
@@ -100,6 +100,11 @@ public:
        throw Exception("Method getFloat64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
    }

+    virtual Float32 getFloat32(size_t /*n*/) const
+    {
+        throw Exception("Method getFloat32 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
+    }
+
    /** If column is numeric, return value of n-th element, casted to UInt64.
      * For NULL values of Nullable column it is allowed to return arbitrary value.
      * Otherwise throw an exception.

--- a/dbms/src/Functions/greatCircleDistance.cpp
+++ b/dbms/src/Functions/greatCircleDistance.cpp
@@ -7,7 +7,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionFactory.h>
 #include <ext/range.h>
-#include <math.h>
+#include <cmath>
 #include <array>


@@ -21,19 +21,32 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

+/** https://en.wikipedia.org/wiki/Great-circle_distance
+ *
+ *  The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
+ *  The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance .
+ *  Throws exception when one or several input values are not within reasonable bounds.
+ *  Latitude must be in [-90, 90], longitude must be [-180, 180].
+ *  Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
+ *  Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
+ *  Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
+ *  The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table.
+ */
+
 namespace
 {
-const double PI = 3.14159265358979323846;
-const float TO_RADF = static_cast<float>(PI / 180.0);
-const float TO_RADF2 = static_cast<float>(PI / 360.0);

-const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
-const int GEODIST_TABLE_ASIN = 512;
-const int GEODIST_TABLE_K = 1024;
+constexpr double PI = 3.14159265358979323846;
+constexpr float TO_RADF = static_cast<float>(PI / 180.0);
+constexpr float TO_RADF2 = static_cast<float>(PI / 360.0);
+
+constexpr size_t GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
+constexpr size_t GEODIST_TABLE_ASIN = 512;
+constexpr size_t GEODIST_TABLE_K = 1024;

 float g_GeoCos[GEODIST_TABLE_COS + 1];        /// cos(x) table
 float g_GeoAsin[GEODIST_TABLE_ASIN + 1];    /// asin(sqrt(x)) table
-float g_GeoFlatK[GEODIST_TABLE_K + 1][2];    /// geodistAdaptive() flat ellipsoid method k1,k2 coeffs table
+float g_GeoFlatK[GEODIST_TABLE_K + 1][2];    /// geodistAdaptive() flat ellipsoid method k1, k2 coeffs table

 inline double sqr(double v)
 {
@@ -48,7 +61,7 @@ inline float fsqr(float v)
 void geodistInit()
 {
    for (size_t i = 0; i <= GEODIST_TABLE_COS; ++i)
-        g_GeoCos[i] = static_cast<float>(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2pi] -> [0, COSTABLE]
+        g_GeoCos[i] = static_cast<float>(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2 * pi] -> [0, COSTABLE]

    for (size_t i = 0; i <= GEODIST_TABLE_ASIN; ++i)
        g_GeoAsin[i] = static_cast<float>(asin(
@@ -56,7 +69,7 @@ void geodistInit()

    for (size_t i = 0; i <= GEODIST_TABLE_K; ++i)
    {
-        double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE]
+        double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi / 2, pi / 2] -> [0, KTABLE]
        g_GeoFlatK[i][0] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x)));
        g_GeoFlatK[i][1] = static_cast<float>(sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x)));
    }
@@ -86,11 +99,10 @@ inline float geodistFastSin(float x)
    float y = static_cast<float>(fabs(x) * GEODIST_TABLE_COS / PI / 2);
    int i = static_cast<int>(y);
    y -= i;
-    i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x-pi/2)=sin(x), costable/4=pi/2
+    i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
    return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y;
 }

-
 /// fast implementation of asin(sqrt(x))
 /// max error in floats 0.00369%, in doubles 0.00072%
 inline float geodistFastAsinSqrt(float x)
@@ -110,17 +122,10 @@ inline float geodistFastAsinSqrt(float x)
    }
    return static_cast<float>(asin(sqrt(x))); // distance over 17083km, just compute honestly
 }
+
 }
-/**
- *  The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
- *  The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance .
- *  Throws exception when one or several input values are not within reasonable bounds.
- *  Latitude must be in [-90, 90], longitude must be [-180, 180].
- *  Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
- *  Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
- *  Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
- *  The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table.
- */
+
+
 class FunctionGreatCircleDistance : public IFunction
 {
 public:
@@ -128,133 +133,75 @@ public:
    static FunctionPtr create(const Context &) { return std::make_shared<FunctionGreatCircleDistance>(); }

 private:
-
-    enum class instr_type : uint8_t
-    {
-        get_float_64,
-        get_const_float_64
-    };
-
-    using instr_t = std::pair<instr_type, const IColumn *>;
-    using instrs_t = std::array<instr_t, 4>;
-
    String getName() const override { return name; }
-
    size_t getNumberOfArguments() const override { return 4; }

+    bool useDefaultImplementationForConstants() const override { return true; }
+
    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
        for (const auto arg_idx : ext::range(0, arguments.size()))
        {
            const auto arg = arguments[arg_idx].get();
-            if (!WhichDataType(arg).isFloat64())
+            if (!WhichDataType(arg).isFloat())
                throw Exception(
                    "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64",
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
        }

-        return std::make_shared<DataTypeFloat64>();
+        return std::make_shared<DataTypeFloat32>();
    }

-    instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const)
+    Float32 greatCircleDistance(Float32 lon1deg, Float32 lat1deg, Float32 lon2deg, Float32 lat2deg)
    {
-        instrs_t result;
-        out_const = true;
-
-        for (const auto arg_idx : ext::range(0, arguments.size()))
-        {
-            const auto column = block.getByPosition(arguments[arg_idx]).column.get();
-
-            if (const auto col = checkAndGetColumn<ColumnVector<Float64>>(column))
-            {
-                out_const = false;
-                result[arg_idx] = instr_t{instr_type::get_float_64, col};
-            }
-            else if (const auto col_const = checkAndGetColumnConst<ColumnVector<Float64>>(column))
-            {
-                result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const};
-            }
-            else
-                throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(),
-                    ErrorCodes::ILLEGAL_COLUMN);
-        }
-
-        return result;
-    }
-
-    /// https://en.wikipedia.org/wiki/Great-circle_distance
-    Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg)
-    {
-        if (lon1Deg < -180 || lon1Deg > 180 ||
-            lon2Deg < -180 || lon2Deg > 180 ||
-            lat1Deg < -90 || lat1Deg > 90 ||
-            lat2Deg < -90 || lat2Deg > 90)
+        if (lon1deg < -180 || lon1deg > 180 ||
+            lon2deg < -180 || lon2deg > 180 ||
+            lat1deg < -90 || lat1deg > 90 ||
+            lat2deg < -90 || lat2deg > 90)
        {
            throw Exception("Arguments values out of bounds for function " + getName(),
                            ErrorCodes::ARGUMENT_OUT_OF_BOUND);
        }

-        float dlat = geodistDegDiff(lat1Deg - lat2Deg);
-        float dlon = geodistDegDiff(lon1Deg - lon2Deg);
+        float lat_diff = geodistDegDiff(lat1deg - lat2deg);
+        float lon_diff = geodistDegDiff(lon1deg - lon2deg);

-        if (dlon < 13)
+        if (lon_diff < 13)
        {
            // points are close enough; use flat ellipsoid model
            // interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
-            float m = (lat1Deg + lat2Deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
-            int i = static_cast<int>(m);
-            i &= (GEODIST_TABLE_K - 1);
+            float m = (lat1deg + lat2deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
+            size_t i = static_cast<size_t>(m) & (GEODIST_TABLE_K - 1);
            float kk1 = g_GeoFlatK[i][0] + (g_GeoFlatK[i + 1][0] - g_GeoFlatK[i][0]) * (m - i);
            float kk2 = g_GeoFlatK[i][1] + (g_GeoFlatK[i + 1][1] - g_GeoFlatK[i][1]) * (m - i);
-            return static_cast<float>(sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon));
+            return static_cast<float>(sqrt(kk1 * lat_diff * lat_diff + kk2 * lon_diff * lon_diff));
+        }
+        else
+        {
+            // points too far away; use haversine
+            static const float d = 2 * 6371000;
+            float a = fsqr(geodistFastSin(lat_diff * TO_RADF2)) +
+                geodistFastCos(lat1deg * TO_RADF) * geodistFastCos(lat2deg * TO_RADF) *
+                fsqr(geodistFastSin(lon_diff * TO_RADF2));
+            return static_cast<float>(d * geodistFastAsinSqrt(a));
        }
-        // points too far away; use haversine
-        static const float D = 2 * 6371000;
-        float a = fsqr(geodistFastSin(dlat * TO_RADF2)) +
-                  geodistFastCos(lat1Deg * TO_RADF) * geodistFastCos(lat2Deg * TO_RADF) *
-                  fsqr(geodistFastSin(dlon * TO_RADF2));
-        return static_cast<float>(D * geodistFastAsinSqrt(a));
    }

-
    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
    {
-        const auto size = input_rows_count;
+        auto dst = ColumnVector<Float32>::create();
+        auto & dst_data = dst->getData();
+        dst_data.resize(input_rows_count);

-        bool result_is_const{};
-        auto instrs = getInstructions(block, arguments, result_is_const);
+        const IColumn & col_lon1 = *block.getByPosition(arguments[0]).column;
+        const IColumn & col_lat1 = *block.getByPosition(arguments[1]).column;
+        const IColumn & col_lon2 = *block.getByPosition(arguments[2]).column;
+        const IColumn & col_lat2 = *block.getByPosition(arguments[3]).column;

-        if (result_is_const)
-        {
-            const auto & colLon1 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[0]).column.get())->getValue<Float64>();
-            const auto & colLat1 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[1]).column.get())->getValue<Float64>();
-            const auto & colLon2 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[2]).column.get())->getValue<Float64>();
-            const auto & colLat2 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[3]).column.get())->getValue<Float64>();
+        for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
+            dst_data[row_num] = greatCircleDistance(col_lon1.getFloat32(), col_lat1.getFloat32(), col_lon2.getFloat32(), col_lat2.getFloat32());

-            Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2);
-            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res);
-        }
-        else
-        {
-            auto dst = ColumnVector<Float64>::create();
-            auto & dst_data = dst->getData();
-            dst_data.resize(size);
-            Float64 vals[instrs.size()];
-            for (const auto row : ext::range(0, size))
-            {
-                for (const auto idx : ext::range(0, instrs.size()))
-                {
-                    if (instr_type::get_float_64 == instrs[idx].first)
-                        vals[idx] = assert_cast<const ColumnVector<Float64> *>(instrs[idx].second)->getData()[row];
-                    else if (instr_type::get_const_float_64 == instrs[idx].first)
-                        vals[idx] = assert_cast<const ColumnConst *>(instrs[idx].second)->getValue<Float64>();
-                    else
-                        throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR};
-                }
-                dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]);
-            }
-            block.getByPosition(result).column = std::move(dst);
-        }
+        block.getByPosition(result).column = std::move(dst);
    }
 };