未验证 提交 f5e88f8c 编写于 作者: A alexey-milovidov 提交者: GitHub

Merge pull request #7307 from ClickHouse/new-branch-for-new-geodist

Speed up greatCircleDistance function with test
......@@ -10,9 +10,6 @@
#include <math.h>
#include <array>
#define DEGREES_IN_RADIANS (M_PI / 180.0)
#define EARTH_RADIUS_IN_METERS 6372797.560856
namespace DB
{
......@@ -24,19 +21,109 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; }
namespace
{
const double PI = 3.14159265358979323846;
const float TO_RADF = static_cast<float>(PI / 180.0);
const float TO_RADF2 = static_cast<float>(PI / 360.0);
const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
const int GEODIST_TABLE_ASIN = 512;
const int GEODIST_TABLE_K = 1024;
float g_GeoCos[GEODIST_TABLE_COS + 1]; /// cos(x) table
float g_GeoAsin[GEODIST_TABLE_ASIN + 1]; /// asin(sqrt(x)) table
float g_GeoFlatK[GEODIST_TABLE_K + 1][2]; /// geodistAdaptive() flat ellipsoid method k1,k2 coeffs table
inline double sqr(double v)
{
return v * v;
}
inline float fsqr(float v)
{
return v * v;
}
void geodistInit()
{
for (size_t i = 0; i <= GEODIST_TABLE_COS; ++i)
g_GeoCos[i] = static_cast<float>(cos(2 * PI * i / GEODIST_TABLE_COS)); // [0, 2pi] -> [0, COSTABLE]
for (size_t i = 0; i <= GEODIST_TABLE_ASIN; ++i)
g_GeoAsin[i] = static_cast<float>(asin(
sqrt(static_cast<double>(i) / GEODIST_TABLE_ASIN))); // [0, 1] -> [0, ASINTABLE]
for (size_t i = 0; i <= GEODIST_TABLE_K; ++i)
{
double x = PI * i / GEODIST_TABLE_K - PI * 0.5; // [-pi/2, pi/2] -> [0, KTABLE]
g_GeoFlatK[i][0] = static_cast<float>(sqr(111132.09 - 566.05 * cos(2 * x) + 1.20 * cos(4 * x)));
g_GeoFlatK[i][1] = static_cast<float>(sqr(111415.13 * cos(x) - 94.55 * cos(3 * x) + 0.12 * cos(5 * x)));
}
}
inline float geodistDegDiff(float f)
{
f = static_cast<float>(fabs(f));
while (f > 360)
f -= 360;
if (f > 180)
f = 360 - f;
return f;
}
inline float geodistFastCos(float x)
{
float y = static_cast<float>(fabs(x) * GEODIST_TABLE_COS / PI / 2);
int i = static_cast<int>(y);
y -= i;
i &= (GEODIST_TABLE_COS - 1);
return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y;
}
inline float geodistFastSin(float x)
{
float y = static_cast<float>(fabs(x) * GEODIST_TABLE_COS / PI / 2);
int i = static_cast<int>(y);
y -= i;
i = (i - GEODIST_TABLE_COS / 4) & (GEODIST_TABLE_COS - 1); // cos(x-pi/2)=sin(x), costable/4=pi/2
return g_GeoCos[i] + (g_GeoCos[i + 1] - g_GeoCos[i]) * y;
}
/// fast implementation of asin(sqrt(x))
/// max error in floats 0.00369%, in doubles 0.00072%
inline float geodistFastAsinSqrt(float x)
{
if (x < 0.122)
{
// distance under 4546km, Taylor error under 0.00072%
float y = static_cast<float>(sqrt(x));
return y + x * y * 0.166666666666666f + x * x * y * 0.075f + x * x * x * y * 0.044642857142857f;
}
if (x < 0.948)
{
// distance under 17083km, 512-entry LUT error under 0.00072%
x *= GEODIST_TABLE_ASIN;
int i = static_cast<int>(x);
return g_GeoAsin[i] + (g_GeoAsin[i + 1] - g_GeoAsin[i]) * (x - i);
}
return static_cast<float>(asin(sqrt(x))); // distance over 17083km, just compute honestly
}
}
/**
* The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees.
* The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance.
* The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance .
* Throws exception when one or several input values are not within reasonable bounds.
* Latitude must be in [-90, 90], longitude must be [-180, 180]
*
* Latitude must be in [-90, 90], longitude must be [-180, 180].
* Original code of this implementation of this function is here https://github.com/sphinxsearch/sphinx/blob/409f2c2b5b2ff70b04e38f92b6b1a890326bad65/src/sphinxexpr.cpp#L3825.
* Andrey Aksenov, the author of original code, permitted to use this code in ClickHouse under the Apache 2.0 license.
* Presentation about this code from Highload++ Siberia 2019 is here https://github.com/ClickHouse/ClickHouse/files/3324740/1_._._GEODIST_._.pdf
* The main idea of this implementation is optimisations based on Taylor series, trigonometric identity and calculated constants once for cosine, arcsine(sqrt) and look up table.
*/
class FunctionGreatCircleDistance : public IFunction
{
public:
static constexpr auto name = "greatCircleDistance";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionGreatCircleDistance>(); }
......@@ -103,16 +190,30 @@ private:
lat1Deg < -90 || lat1Deg > 90 ||
lat2Deg < -90 || lat2Deg > 90)
{
throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
throw Exception("Arguments values out of bounds for function " + getName(),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
Float64 lon1Rad = degToRad(lon1Deg);
Float64 lat1Rad = degToRad(lat1Deg);
Float64 lon2Rad = degToRad(lon2Deg);
Float64 lat2Rad = degToRad(lat2Deg);
Float64 u = sin((lat2Rad - lat1Rad) / 2);
Float64 v = sin((lon2Rad - lon1Rad) / 2);
return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v));
float dlat = geodistDegDiff(lat1Deg - lat2Deg);
float dlon = geodistDegDiff(lon1Deg - lon2Deg);
if (dlon < 13)
{
// points are close enough; use flat ellipsoid model
// interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
float m = (lat1Deg + lat2Deg + 180) * GEODIST_TABLE_K / 360; // [-90, 90] degrees -> [0, KTABLE] indexes
int i = static_cast<int>(m);
i &= (GEODIST_TABLE_K - 1);
float kk1 = g_GeoFlatK[i][0] + (g_GeoFlatK[i + 1][0] - g_GeoFlatK[i][0]) * (m - i);
float kk2 = g_GeoFlatK[i][1] + (g_GeoFlatK[i + 1][1] - g_GeoFlatK[i][1]) * (m - i);
return static_cast<float>(sqrt(kk1 * dlat * dlat + kk2 * dlon * dlon));
}
// points too far away; use haversine
static const float D = 2 * 6371000;
float a = fsqr(geodistFastSin(dlat * TO_RADF2)) +
geodistFastCos(lat1Deg * TO_RADF) * geodistFastCos(lat2Deg * TO_RADF) *
fsqr(geodistFastSin(dlon * TO_RADF2));
return static_cast<float>(D * geodistFastAsinSqrt(a));
}
......@@ -160,6 +261,7 @@ private:
void registerFunctionGreatCircleDistance(FunctionFactory & factory)
{
geodistInit();
factory.registerFunction<FunctionGreatCircleDistance>();
}
......
......@@ -22,6 +22,8 @@ You can use `substitions`, `create`, `fill` and `drop` queries to prepare test.
Take into account, that these tests will run in CI which consists of 56-cores and 512 RAM machines. Queries will be executed much faster than on local laptop.
If your test continued more than 10 minutes, please, add tag `long` to have an opportunity to run all tests and skip long ones.
### How to run performance test
You have to run clickhouse-server and after you can start testing:
......
<test>
<type>once</type>
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>1000</average_speed_not_changing_for_ms>
<total_time_ms>10000</total_time_ms>
</any_of>
</stop_conditions>
<!-- lon [-180; 180], lat [-90; 90] -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance((rand() % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand()) / 4294967296 - 180, (rand() % 180) * 1. - 90))</query>
<!-- 55.755830, 37.617780 is center of Moscow -->
<query>SELECT count() FROM system.numbers WHERE NOT ignore(greatCircleDistance(55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296, 55. + toFloat64(rand()) / 4294967296, 37. + toFloat64(rand()) / 4294967296))</query>
</test>
SELECT floor(greatCircleDistance(33.3, 55.3, 38.7, 55.1)) AS distance;
SELECT floor(greatCircleDistance(33.3 + v, 55.3 + v, 38.7 + v , 55.1 + v)) AS distance from
(
select number + 0.1 as v from system.numbers limit 1
);
SELECT floor(greatCircleDistance(33.3, 55.3, 33.3, 55.3)) AS distance;
-- consts are from vincenty formula from geopy
-- k = '158.756175, 53.006373'
-- u = '37.531014, 55.703050'
-- y = '37.588144, 55.733842'
-- m = '37.617780, 55.755830'
-- n = '83.089598, 54.842461'
select abs(greatCircleDistance(37.531014, 55.703050, 37.588144, 55.733842) - 4964.25740448) / 4964.25740448 < 0.004;
select abs(greatCircleDistance(37.531014, 55.703050, 37.617780, 55.755830) - 8015.52288508) / 8015.52288508 < 0.004;
select abs(greatCircleDistance(37.588144, 55.733842, 37.617780, 55.755830) - 3075.27332275) / 3075.27332275 < 0.004;
select abs(greatCircleDistance(83.089598, 54.842461, 37.617780, 55.755830) - 2837839.72863) / 2837839.72863 < 0.004;
select abs(greatCircleDistance(37.617780, 55.755830, 158.756175, 53.006373) - 6802821.68814) / 6802821.68814 < 0.004;
select abs(greatCircleDistance(83.089598, 54.842461, 158.756175, 53.006373) - 4727216.39539) / 4727216.39539 < 0.004;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册