提交 2474cdfa 编写于 作者: A Alexey Milovidov

Removed tons of garbage from "greatCircleDistance" function. But algorithm is still unclear.

上级 f5e88f8c
......@@ -105,6 +105,11 @@ public:
return data->getFloat64(0);
}
Float32 getFloat32(size_t) const override
{
return data->getFloat32(0);
}
bool isNullAt(size_t) const override
{
return data->isNullAt(0);
......
......@@ -59,6 +59,7 @@ public:
UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); }
Float32 getFloat32(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat32(n)); }
bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); }
bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
ColumnPtr cut(size_t start, size_t length) const override
......
......@@ -66,6 +66,7 @@ public:
UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); }
Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); }
Float64 getFloat64(size_t n) const override { return getNestedColumn()->getFloat64(n); }
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
......
......@@ -222,6 +222,12 @@ Float64 ColumnVector<T>::getFloat64(size_t n) const
return static_cast<Float64>(data[n]);
}
template <typename T>
Float32 ColumnVector<T>::getFloat32(size_t n) const
{
return static_cast<Float32>(data[n]);
}
template <typename T>
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
......
......@@ -205,6 +205,7 @@ public:
UInt64 get64(size_t n) const override;
Float64 getFloat64(size_t n) const override;
Float32 getFloat32(size_t n) const override;
UInt64 getUInt(size_t n) const override
{
......
......@@ -100,6 +100,11 @@ public:
throw Exception("Method getFloat64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual Float32 getFloat32(size_t /*n*/) const
{
throw Exception("Method getFloat32 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/** If column is numeric, return value of n-th element, casted to UInt64.
* For NULL values of Nullable column it is allowed to return arbitrary value.
* Otherwise throw an exception.
......
......@@ -7,7 +7,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <ext/range.h>
#include <math.h>
#include <cmath>
#include <array>
......@@ -21,19 +21,32 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
/** https://en.wikipedia.org/wiki/Great-circle_distance
*
* The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
* The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance .
* Throws exception when one or several input values are not within reasonable bounds.
* Latitude must be in [-90, 90], longitude must be [-180, 180].
* Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table.
*/
namespace
{
const double PI = 3.14159265358979323846;
const float TO_RADF = static_cast<float>(PI / 180.0);
const float TO_RADF2 = static_cast<float>(PI / 360.0);
const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
const int GEODIST_TABLE_ASIN = 512;
const int GEODIST_TABLE_K = 1024;
constexpr double PI = 3.14159265358979323846;
constexpr float TO_RADF = static_cast<float>(PI / 180.0);
constexpr float TO_RADF2 = static_cast<float>(PI / 360.0);
constexpr size_t GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
constexpr size_t GEODIST_TABLE_ASIN = 512;
constexpr size_t GEODIST_TABLE_K = 1024;
float g_GeoCos[GEODIST_TABLE_COS + 1]; /// cos(x) table
float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; /// asin(sqrt(x)) table
float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; /// geodistAdaptive() flat ellipsoid method k1,k2 coeffs table
float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; /// geodistAdaptive() flat ellipsoid method k1, k2 coeffs table
inline double sqr(double v)
{
......@@ -48,7 +61,7 @@ inline float fsqr(float v)
void geodistInit()
{
for (size_t i = 0; i <= GEODIST_TABLE_COS; ++i)
g_GeoCos[i] = static_cast<float>(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2pi] -> [0, COSTABLE]
g_GeoCos[i] = static_cast<float>(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2 * pi] -> [0, COSTABLE]
for (size_t i = 0; i <= GEODIST_TABLE_ASIN; ++i)
g_GeoAsin[i] = static_cast<float>(asin(
......@@ -56,7 +69,7 @@ void geodistInit()
for (size_t i = 0; i <= GEODIST_TABLE_K; ++i)
{
double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE]
double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi / 2, pi / 2] -> [0, KTABLE]
g_GeoFlatK[i][0] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x)));
g_GeoFlatK[i][1] = static_cast<float>(sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x)));
}
......@@ -86,11 +99,10 @@ inline float geodistFastSin(float x)
float y = static_cast<float>(fabs(x) * GEODIST_TABLE_COS / PI / 2);
int i = static_cast<int>(y);
y -= i;
i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x-pi/2)=sin(x), costable/4=pi/2
i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x - pi / 2) = sin(x), costable / 4 = pi / 2
return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y;
}
/// fast implementation of asin(sqrt(x))
/// max error in floats 0.00369%, in doubles 0.00072%
inline float geodistFastAsinSqrt(float x)
......@@ -110,17 +122,10 @@ inline float geodistFastAsinSqrt(float x)
}
return static_cast<float>(asin(sqrt(x))); // distance over 17083km, just compute honestly
}
}
/**
* The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
* The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance .
* Throws exception when one or several input values are not within reasonable bounds.
* Latitude must be in [-90, 90], longitude must be [-180, 180].
* Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table.
*/
class FunctionGreatCircleDistance : public IFunction
{
public:
......@@ -128,133 +133,75 @@ public:
static FunctionPtr create(const Context &) { return std::make_shared<FunctionGreatCircleDistance>(); }
private:
enum class instr_type : uint8_t
{
get_float_64,
get_const_float_64
};
using instr_t = std::pair<instr_type, const IColumn *>;
using instrs_t = std::array<instr_t, 4>;
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 4; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
for (const auto arg_idx : ext::range(0, arguments.size()))
{
const auto arg = arguments[arg_idx].get();
if (!WhichDataType(arg).isFloat64())
if (!WhichDataType(arg).isFloat())
throw Exception(
"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return std::make_shared<DataTypeFloat64>();
return std::make_shared<DataTypeFloat32>();
}
instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const)
Float32 greatCircleDistance(Float32 lon1deg, Float32 lat1deg, Float32 lon2deg, Float32 lat2deg)
{
instrs_t result;
out_const = true;
for (const auto arg_idx : ext::range(0, arguments.size()))
{
const auto column = block.getByPosition(arguments[arg_idx]).column.get();
if (const auto col = checkAndGetColumn<ColumnVector<Float64>>(column))
{
out_const = false;
result[arg_idx] = instr_t{instr_type::get_float_64, col};
}
else if (const auto col_const = checkAndGetColumnConst<ColumnVector<Float64>>(column))
{
result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const};
}
else
throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
return result;
}
/// https://en.wikipedia.org/wiki/Great-circle_distance
Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg)
{
if (lon1Deg < -180 || lon1Deg > 180 ||
lon2Deg < -180 || lon2Deg > 180 ||
lat1Deg < -90 || lat1Deg > 90 ||
lat2Deg < -90 || lat2Deg > 90)
if (lon1deg < -180 || lon1deg > 180 ||
lon2deg < -180 || lon2deg > 180 ||
lat1deg < -90 || lat1deg > 90 ||
lat2deg < -90 || lat2deg > 90)
{
throw Exception("Arguments values out of bounds for function " + getName(),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
float dlat = geodistDegDiff(lat1Deg - lat2Deg);
float dlon = geodistDegDiff(lon1Deg - lon2Deg);
float lat_diff = geodistDegDiff(lat1deg - lat2deg);
float lon_diff = geodistDegDiff(lon1deg - lon2deg);
if (dlon < 13)
if (lon_diff < 13)
{
// points are close enough; use flat ellipsoid model
// interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
float m = (lat1Deg + lat2Deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
int i = static_cast<int>(m);
i &= (GEODIST_TABLE_K - 1);
float m = (lat1deg + lat2deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
size_t i = static_cast<size_t>(m) & (GEODIST_TABLE_K - 1);
float kk1 = g_GeoFlatK[i][0] + (g_GeoFlatK[i + 1][0] - g_GeoFlatK[i][0]) * (m - i);
float kk2 = g_GeoFlatK[i][1] + (g_GeoFlatK[i + 1][1] - g_GeoFlatK[i][1]) * (m - i);
return static_cast<float>(sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon));
return static_cast<float>(sqrt(kk1 * lat_diff * lat_diff + kk2 * lon_diff * lon_diff));
}
else
{
// points too far away; use haversine
static const float d = 2 * 6371000;
float a = fsqr(geodistFastSin(lat_diff * TO_RADF2)) +
geodistFastCos(lat1deg * TO_RADF) * geodistFastCos(lat2deg * TO_RADF) *
fsqr(geodistFastSin(lon_diff * TO_RADF2));
return static_cast<float>(d * geodistFastAsinSqrt(a));
}
// points too far away; use haversine
static const float D = 2 * 6371000;
float a = fsqr(geodistFastSin(dlat * TO_RADF2)) +
geodistFastCos(lat1Deg * TO_RADF) * geodistFastCos(lat2Deg * TO_RADF) *
fsqr(geodistFastSin(dlon * TO_RADF2));
return static_cast<float>(D * geodistFastAsinSqrt(a));
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
const auto size = input_rows_count;
auto dst = ColumnVector<Float32>::create();
auto & dst_data = dst->getData();
dst_data.resize(input_rows_count);
bool result_is_const{};
auto instrs = getInstructions(block, arguments, result_is_const);
const IColumn & col_lon1 = *block.getByPosition(arguments[0]).column;
const IColumn & col_lat1 = *block.getByPosition(arguments[1]).column;
const IColumn & col_lon2 = *block.getByPosition(arguments[2]).column;
const IColumn & col_lat2 = *block.getByPosition(arguments[3]).column;
if (result_is_const)
{
const auto & colLon1 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[0]).column.get())->getValue<Float64>();
const auto & colLat1 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[1]).column.get())->getValue<Float64>();
const auto & colLon2 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[2]).column.get())->getValue<Float64>();
const auto & colLat2 = assert_cast<const ColumnConst *>(block.getByPosition(arguments[3]).column.get())->getValue<Float64>();
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
dst_data[row_num] = greatCircleDistance(col_lon1.getFloat32(), col_lat1.getFloat32(), col_lon2.getFloat32(), col_lat2.getFloat32());
Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2);
block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res);
}
else
{
auto dst = ColumnVector<Float64>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
Float64 vals[instrs.size()];
for (const auto row : ext::range(0, size))
{
for (const auto idx : ext::range(0, instrs.size()))
{
if (instr_type::get_float_64 == instrs[idx].first)
vals[idx] = assert_cast<const ColumnVector<Float64> *>(instrs[idx].second)->getData()[row];
else if (instr_type::get_const_float_64 == instrs[idx].first)
vals[idx] = assert_cast<const ColumnConst *>(instrs[idx].second)->getValue<Float64>();
else
throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR};
}
dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]);
}
block.getByPosition(result).column = std::move(dst);
}
block.getByPosition(result).column = std::move(dst);
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册