Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
TDengine
提交
c04da26c
T
TDengine
项目概览
慢慢CG
/
TDengine
与 Fork 源项目一致
Fork自
taosdata / TDengine
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c04da26c
编写于
1月 12, 2021
作者:
H
Haojun Liao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[TD-2634]
上级
84b867e2
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
447 addition
and
460 deletion
+447
-460
src/query/inc/qPercentile.h
src/query/inc/qPercentile.h
+23
-15
src/query/src/qAggMain.c
src/query/src/qAggMain.c
+1
-1
src/query/src/qPercentile.c
src/query/src/qPercentile.c
+133
-425
src/query/tests/percentileTest.cpp
src/query/tests/percentileTest.cpp
+254
-0
src/util/src/tcompare.c
src/util/src/tcompare.c
+36
-19
未找到文件。
src/query/inc/qPercentile.h
浏览文件 @
c04da26c
...
...
@@ -16,6 +16,10 @@
#ifndef TDENGINE_QPERCENTILE_H
#define TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
extern
"C"
{
#endif
#include "qExtbuffer.h"
#include "qResultbuf.h"
#include "qTsbuf.h"
...
...
@@ -23,13 +27,13 @@
typedef
struct
MinMaxEntry
{
union
{
double
dMinVal
;
int32_t
iMinVal
;
int64_t
i64MinVal
;
uint64_t
u64MinVal
;
};
union
{
double
dMaxVal
;
int32_t
iMaxVal
;
int64_t
i64MaxVal
;
int64_t
u64MaxVal
;
};
}
MinMaxEntry
;
...
...
@@ -59,7 +63,7 @@ typedef struct tMemBucket {
int32_t
times
;
// count that has been checked for deciding the correct data value buckets.
__compar_fn_t
comparFn
;
tMemBucketSlot
*
pSlots
;
tMemBucketSlot
*
pSlots
;
SDiskbasedResultBuf
*
pBuffer
;
__perc_hash_func_t
hashFunc
;
}
tMemBucket
;
...
...
@@ -73,3 +77,7 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size);
double
getPercentile
(
tMemBucket
*
pMemBucket
,
double
percent
);
#endif // TDENGINE_QPERCENTILE_H
#ifdef __cplusplus
}
#endif
\ No newline at end of file
src/query/src/qAggMain.c
浏览文件 @
c04da26c
...
...
@@ -2545,7 +2545,7 @@ static void percentile_next_step(SQLFunctionCtx *pCtx) {
if
(
pInfo
->
numOfElems
==
0
)
{
pResInfo
->
complete
=
true
;
}
else
{
pInfo
->
pMemBucket
=
tMemBucketCreate
(
pCtx
->
inputBytes
,
pCtx
->
inputType
,
GET_DOUBLE_VAL
(
&
pInfo
->
minval
),
GET_DOUBLE_VAL
(
&
pInfo
->
maxval
)
);
pInfo
->
pMemBucket
=
tMemBucketCreate
(
pCtx
->
inputBytes
,
pCtx
->
inputType
,
pInfo
->
minval
,
pInfo
->
maxval
);
}
pInfo
->
stage
+=
1
;
...
...
src/query/src/qPercentile.c
浏览文件 @
c04da26c
...
...
@@ -20,6 +20,7 @@
#include "taosdef.h"
#include "tulog.h"
#include "tcompare.h"
#include "ttype.h"
#define DEFAULT_NUM_OF_SLOT 1024
...
...
@@ -48,25 +49,15 @@ static tFilePage *loadDataFromFilePage(tMemBucket *pMemBucket, int32_t slotIdx)
}
static
void
resetBoundingBox
(
MinMaxEntry
*
range
,
int32_t
type
)
{
switch
(
type
)
{
case
TSDB_DATA_TYPE_BIGINT
:
{
if
(
IS_SIGNED_NUMERIC_TYPE
(
type
))
{
range
->
i64MaxVal
=
INT64_MIN
;
range
->
i64MinVal
=
INT64_MAX
;
break
;
};
case
TSDB_DATA_TYPE_INT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_TINYINT
:
{
range
->
iMaxVal
=
INT32_MIN
;
range
->
iMinVal
=
INT32_MAX
;
break
;
};
case
TSDB_DATA_TYPE_DOUBLE
:
case
TSDB_DATA_TYPE_FLOAT
:
{
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
type
))
{
range
->
u64MaxVal
=
0
;
range
->
u64MinVal
=
UINT64_MAX
;
}
else
{
range
->
dMaxVal
=
-
DBL_MAX
;
range
->
dMinVal
=
DBL_MAX
;
break
;
}
}
}
...
...
@@ -75,23 +66,15 @@ static int32_t setBoundingBox(MinMaxEntry* range, int16_t type, double minval, d
return
-
1
;
}
switch
(
type
)
{
case
TSDB_DATA_TYPE_TINYINT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_INT
:
range
->
iMinVal
=
(
int32_t
)
minval
;
range
->
iMaxVal
=
(
int32_t
)
maxval
;
break
;
case
TSDB_DATA_TYPE_BIGINT
:
if
(
IS_SIGNED_NUMERIC_TYPE
(
type
))
{
range
->
i64MinVal
=
(
int64_t
)
minval
;
range
->
i64MaxVal
=
(
int64_t
)
maxval
;
break
;
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
type
)){
range
->
u64MinVal
=
(
uint64_t
)
minval
;
range
->
u64MaxVal
=
(
uint64_t
)
maxval
;
}
else
{
range
->
dMinVal
=
minval
;
range
->
dMaxVal
=
maxval
;
break
;
}
return
0
;
...
...
@@ -120,58 +103,24 @@ double findOnlyResult(tMemBucket *pMemBucket) {
tFilePage
*
pPage
=
getResBufPage
(
pMemBucket
->
pBuffer
,
pgInfo
->
pageId
);
assert
(
pPage
->
num
==
1
);
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_INT
:
return
*
(
int32_t
*
)
pPage
->
data
;
case
TSDB_DATA_TYPE_SMALLINT
:
return
*
(
int16_t
*
)
pPage
->
data
;
case
TSDB_DATA_TYPE_TINYINT
:
return
*
(
int8_t
*
)
pPage
->
data
;
case
TSDB_DATA_TYPE_BIGINT
:
return
(
double
)(
*
(
int64_t
*
)
pPage
->
data
);
case
TSDB_DATA_TYPE_DOUBLE
:
{
double
dv
=
GET_DOUBLE_VAL
(
pPage
->
data
);
return
dv
;
}
case
TSDB_DATA_TYPE_FLOAT
:
{
float
fv
=
GET_FLOAT_VAL
(
pPage
->
data
);
return
fv
;
}
default:
return
0
;
}
double
v
=
0
;
GET_TYPED_DATA
(
v
,
double
,
pMemBucket
->
type
,
pPage
->
data
);
return
v
;
}
return
0
;
}
int32_t
tBucketBigIntHash
(
tMemBucket
*
pBucket
,
const
void
*
value
)
{
int64_t
v
=
*
(
int64_t
*
)
value
;
int32_t
index
=
-
1
;
int32_t
halfSlot
=
pBucket
->
numOfSlots
>>
1
;
// int32_t bits = 32;//bitsOfNumber(pBucket->numOfSlots) - 1;
if
(
pBucket
->
range
.
i64MaxVal
==
INT64_MIN
)
{
if
(
v
>=
0
)
{
index
=
(
v
>>
(
64
-
9
))
+
halfSlot
;
}
else
{
// v<0
index
=
((
-
v
)
>>
(
64
-
9
));
index
=
-
index
+
(
halfSlot
-
1
);
}
return
index
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
i64MinVal
||
v
>
pBucket
->
range
.
i64MaxVal
)
{
return
-
1
;
}
int32_t
tBucketIntHash
(
tMemBucket
*
pBucket
,
const
void
*
value
)
{
int64_t
v
=
0
;
GET_TYPED_DATA
(
v
,
int64_t
,
pBucket
->
type
,
value
);
// todo hash for bigint and float and double
int64_t
span
=
pBucket
->
range
.
i64MaxVal
-
pBucket
->
range
.
i64MinVal
;
int32_t
index
=
-
1
;
// divide the value range into 1024 buckets
uint64_t
span
=
pBucket
->
range
.
i64MaxVal
-
pBucket
->
range
.
i64MinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
int32_t
delta
=
(
int32_t
)(
v
-
pBucket
->
range
.
i64MinVal
)
;
index
=
delta
%
pBucket
->
numOfSlots
;
int32_t
delta
=
v
-
pBucket
->
range
.
i64MinVal
;
index
=
(
delta
%
pBucket
->
numOfSlots
)
;
}
else
{
double
slotSpan
=
(
double
)
span
/
pBucket
->
numOfSlots
;
index
=
(
int32_t
)((
v
-
pBucket
->
range
.
i64MinVal
)
/
slotSpan
);
...
...
@@ -180,57 +129,30 @@ int32_t tBucketBigIntHash(tMemBucket *pBucket, const void *value) {
}
}
assert
(
v
>=
pBucket
->
range
.
i64MinVal
&&
v
<=
pBucket
->
range
.
i64MaxVal
&&
index
>=
0
&&
index
<
pBucket
->
numOfSlots
);
return
index
;
}
}
// todo refactor to more generic
int32_t
tBucketIntHash
(
tMemBucket
*
pBucket
,
const
void
*
value
)
{
int32_t
v
=
0
;
switch
(
pBucket
->
type
)
{
case
TSDB_DATA_TYPE_SMALLINT
:
v
=
*
(
int16_t
*
)
value
;
break
;
case
TSDB_DATA_TYPE_TINYINT
:
v
=
*
(
int8_t
*
)
value
;
break
;
default:
v
=
*
(
int32_t
*
)
value
;
break
;
}
int32_t
tBucketUintHash
(
tMemBucket
*
pBucket
,
const
void
*
value
)
{
int64_t
v
=
0
;
GET_TYPED_DATA
(
v
,
uint64_t
,
pBucket
->
type
,
value
);
int32_t
index
=
-
1
;
if
(
pBucket
->
range
.
iMaxVal
==
INT32_MIN
)
{
/*
* taking negative integer into consideration,
* there is only half of pBucket->segs available for non-negative integer
*/
int32_t
halfSlot
=
pBucket
->
numOfSlots
>>
1
;
int32_t
bits
=
32
;
//bitsOfNumber(pBucket->numOfSlots) - 1;
if
(
v
>=
0
)
{
index
=
(
v
>>
(
bits
-
9
))
+
halfSlot
;
}
else
{
// v < 0
index
=
((
-
v
)
>>
(
32
-
9
));
index
=
-
index
+
(
halfSlot
-
1
);
}
return
index
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
iMinVal
||
v
>
pBucket
->
range
.
iMaxVal
)
{
return
-
1
;
}
// divide a range of [iMinVal, iMaxVal] into 1024 buckets
int32_t
span
=
pBucket
->
range
.
iMaxVal
-
pBucket
->
range
.
iMinVal
;
// divide the value range into 1024 buckets
uint64_t
span
=
pBucket
->
range
.
u64MaxVal
-
pBucket
->
range
.
u64MinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
int32_t
delta
=
v
-
pBucket
->
range
.
i
MinVal
;
int32_t
delta
=
v
-
pBucket
->
range
.
u64
MinVal
;
index
=
(
delta
%
pBucket
->
numOfSlots
);
}
else
{
double
slotSpan
=
(
double
)
span
/
pBucket
->
numOfSlots
;
index
=
(
int32_t
)((
v
-
pBucket
->
range
.
i
MinVal
)
/
slotSpan
);
if
(
v
==
pBucket
->
range
.
i
MaxVal
)
{
index
=
(
int32_t
)((
v
-
pBucket
->
range
.
u64
MinVal
)
/
slotSpan
);
if
(
v
==
pBucket
->
range
.
u64
MaxVal
)
{
index
-=
1
;
}
}
assert
(
v
>=
pBucket
->
range
.
u64MinVal
&&
v
<=
pBucket
->
range
.
i64MaxVal
&&
index
>=
0
&&
index
<
pBucket
->
numOfSlots
);
return
index
;
}
}
int32_t
tBucketDoubleHash
(
tMemBucket
*
pBucket
,
const
void
*
value
)
{
...
...
@@ -243,21 +165,6 @@ int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
int32_t
index
=
-
1
;
if
(
pBucket
->
range
.
dMinVal
==
DBL_MAX
)
{
/*
* taking negative integer into consideration,
* there is only half of pBucket->segs available for non-negative integer
*/
double
x
=
DBL_MAX
/
(
pBucket
->
numOfSlots
>>
1
);
double
posx
=
(
v
+
DBL_MAX
)
/
x
;
return
((
int32_t
)
posx
)
%
pBucket
->
numOfSlots
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
dMinVal
||
v
>
pBucket
->
range
.
dMaxVal
)
{
return
-
1
;
}
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double
span
=
pBucket
->
range
.
dMaxVal
-
pBucket
->
range
.
dMinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
...
...
@@ -271,34 +178,17 @@ int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
}
}
if
(
index
<
0
||
index
>
pBucket
->
numOfSlots
)
{
uError
(
"error in hash process. slot id: %d"
,
index
);
}
assert
(
v
>=
pBucket
->
range
.
dMinVal
&&
v
<=
pBucket
->
range
.
dMaxVal
&&
index
>=
0
&&
index
<
pBucket
->
numOfSlots
);
return
index
;
}
}
static
__perc_hash_func_t
getHashFunc
(
int32_t
type
)
{
switch
(
type
)
{
case
TSDB_DATA_TYPE_INT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_TINYINT
:
{
if
(
IS_SIGNED_NUMERIC_TYPE
(
type
))
{
return
tBucketIntHash
;
};
case
TSDB_DATA_TYPE_DOUBLE
:
case
TSDB_DATA_TYPE_FLOAT
:
{
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
type
))
{
return
tBucketUintHash
;
}
else
{
return
tBucketDoubleHash
;
};
case
TSDB_DATA_TYPE_BIGINT
:
{
return
tBucketBigIntHash
;
};
default:
{
return
NULL
;
}
}
}
...
...
@@ -372,77 +262,41 @@ void tMemBucketDestroy(tMemBucket *pBucket) {
}
void
tMemBucketUpdateBoundingBox
(
MinMaxEntry
*
r
,
const
char
*
data
,
int32_t
dataType
)
{
switch
(
dataType
)
{
case
TSDB_DATA_TYPE_INT
:
{
int32_t
val
=
*
(
int32_t
*
)
data
;
if
(
r
->
iMinVal
>
val
)
{
r
->
iMinVal
=
val
;
}
if
(
r
->
iMaxVal
<
val
)
{
r
->
iMaxVal
=
val
;
}
break
;
};
case
TSDB_DATA_TYPE_BIGINT
:
{
int64_t
val
=
*
(
int64_t
*
)
data
;
if
(
r
->
i64MinVal
>
val
)
{
r
->
i64MinVal
=
val
;
}
if
(
IS_SIGNED_NUMERIC_TYPE
(
dataType
))
{
int64_t
v
=
0
;
GET_TYPED_DATA
(
v
,
int64_t
,
dataType
,
data
);
if
(
r
->
i64MaxVal
<
val
)
{
r
->
i64MaxVal
=
val
;
}
break
;
};
case
TSDB_DATA_TYPE_SMALLINT
:
{
int32_t
val
=
*
(
int16_t
*
)
data
;
if
(
r
->
iMinVal
>
val
)
{
r
->
iMinVal
=
val
;
}
if
(
r
->
iMaxVal
<
val
)
{
r
->
iMaxVal
=
val
;
}
break
;
};
case
TSDB_DATA_TYPE_TINYINT
:
{
int32_t
val
=
*
(
int8_t
*
)
data
;
if
(
r
->
iMinVal
>
val
)
{
r
->
iMinVal
=
val
;
if
(
r
->
i64MinVal
>
v
)
{
r
->
i64MinVal
=
v
;
}
if
(
r
->
iMaxVal
<
val
)
{
r
->
iMaxVal
=
val
;
if
(
r
->
i64MaxVal
<
v
)
{
r
->
i64MaxVal
=
v
;
}
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
dataType
))
{
uint64_t
v
=
0
;
GET_TYPED_DATA
(
v
,
uint64_t
,
dataType
,
data
);
break
;
};
case
TSDB_DATA_TYPE_DOUBLE
:
{
// double val = *(double *)data;
double
val
=
GET_DOUBLE_VAL
(
data
);
if
(
r
->
dMinVal
>
val
)
{
r
->
dMinVal
=
val
;
if
(
r
->
i64MinVal
>
v
)
{
r
->
i64MinVal
=
v
;
}
if
(
r
->
dMaxVal
<
val
)
{
r
->
dMaxVal
=
val
;
if
(
r
->
i64MaxVal
<
v
)
{
r
->
i64MaxVal
=
v
;
}
break
;
};
case
TSDB_DATA_TYPE_FLOAT
:
{
double
val
=
GET_FLOAT_VAL
(
data
);
}
else
if
(
IS_FLOAT_TYPE
(
dataType
))
{
double
v
=
0
;
GET_TYPED_DATA
(
v
,
double
,
dataType
,
data
);
if
(
r
->
dMinVal
>
val
)
{
r
->
dMinVal
=
val
;
if
(
r
->
dMinVal
>
v
)
{
r
->
dMinVal
=
v
;
}
if
(
r
->
dMaxVal
<
val
)
{
r
->
dMaxVal
=
val
;
if
(
r
->
dMaxVal
<
v
)
{
r
->
dMaxVal
=
v
;
}
break
;
};
default:
{
assert
(
false
);
}
}
else
{
assert
(
0
);
}
}
...
...
@@ -452,16 +306,13 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, const char *data, int32_t dataT
int32_t
tMemBucketPut
(
tMemBucket
*
pBucket
,
const
void
*
data
,
size_t
size
)
{
assert
(
pBucket
!=
NULL
&&
data
!=
NULL
&&
size
>
0
);
pBucket
->
total
+=
(
int32_t
)
size
;
int32_t
count
=
0
;
int32_t
bytes
=
pBucket
->
bytes
;
for
(
int32_t
i
=
0
;
i
<
size
;
++
i
)
{
char
*
d
=
(
char
*
)
data
+
i
*
bytes
;
count
+=
1
;
int32_t
index
=
(
pBucket
->
hashFunc
)(
pBucket
,
d
);
if
(
index
==
-
1
)
{
// the value is out of range, do not add it into bucket
return
-
1
;
}
tMemBucketSlot
*
pSlot
=
&
pBucket
->
pSlots
[
index
];
tMemBucketUpdateBoundingBox
(
&
pSlot
->
range
,
d
,
pBucket
->
type
);
...
...
@@ -489,64 +340,11 @@ int32_t tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
pSlot
->
info
.
size
+=
1
;
}
pBucket
->
total
+=
count
;
return
0
;
}
////////////////////////////////////////////////////////////////////////////////////////////
static
UNUSED_FUNC
void
findMaxMinValue
(
tMemBucket
*
pMemBucket
,
double
*
maxVal
,
double
*
minVal
)
{
*
minVal
=
DBL_MAX
;
*
maxVal
=
-
DBL_MAX
;
for
(
int32_t
i
=
0
;
i
<
pMemBucket
->
numOfSlots
;
++
i
)
{
tMemBucketSlot
*
pSlot
=
&
pMemBucket
->
pSlots
[
i
];
if
(
pSlot
->
info
.
size
==
0
)
{
continue
;
}
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_INT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_TINYINT
:
{
double
minv
=
pSlot
->
range
.
iMinVal
;
double
maxv
=
pSlot
->
range
.
iMaxVal
;
if
(
*
minVal
>
minv
)
{
*
minVal
=
minv
;
}
if
(
*
maxVal
<
maxv
)
{
*
maxVal
=
maxv
;
}
break
;
}
case
TSDB_DATA_TYPE_DOUBLE
:
case
TSDB_DATA_TYPE_FLOAT
:
{
double
minv
=
pSlot
->
range
.
dMinVal
;
double
maxv
=
pSlot
->
range
.
dMaxVal
;
if
(
*
minVal
>
minv
)
{
*
minVal
=
minv
;
}
if
(
*
maxVal
<
maxv
)
{
*
maxVal
=
maxv
;
}
break
;
}
case
TSDB_DATA_TYPE_BIGINT
:
{
double
minv
=
(
double
)
pSlot
->
range
.
i64MinVal
;
double
maxv
=
(
double
)
pSlot
->
range
.
i64MaxVal
;
if
(
*
minVal
>
minv
)
{
*
minVal
=
minv
;
}
if
(
*
maxVal
<
maxv
)
{
*
maxVal
=
maxv
;
}
break
;
}
}
}
}
/*
*
* now, we need to find the minimum value of the next slot for
...
...
@@ -565,7 +363,6 @@ static MinMaxEntry getMinMaxEntryOfNextSlotWithData(tMemBucket *pMemBucket, int3
}
static
bool
isIdenticalData
(
tMemBucket
*
pMemBucket
,
int32_t
index
);
char
*
getFirstElemOfMemBuffer
(
tMemBucketSlot
*
pSeg
,
int32_t
slotIdx
,
tFilePage
*
pPage
);
static
double
getIdenticalDataVal
(
tMemBucket
*
pMemBucket
,
int32_t
slotIndex
)
{
assert
(
isIdenticalData
(
pMemBucket
,
slotIndex
));
...
...
@@ -573,24 +370,12 @@ static double getIdenticalDataVal(tMemBucket* pMemBucket, int32_t slotIndex) {
tMemBucketSlot
*
pSlot
=
&
pMemBucket
->
pSlots
[
slotIndex
];
double
finalResult
=
0
.
0
;
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_TINYINT
:
case
TSDB_DATA_TYPE_INT
:
{
finalResult
=
pSlot
->
range
.
iMinVal
;
break
;
}
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
{
if
(
IS_SIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
finalResult
=
pSlot
->
range
.
i64MinVal
;
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
finalResult
=
pSlot
->
range
.
u64MinVal
;
}
else
{
finalResult
=
pSlot
->
range
.
dMinVal
;
break
;
};
case
TSDB_DATA_TYPE_BIGINT
:
{
finalResult
=
(
double
)
pSlot
->
range
.
i64MinVal
;
break
;
}
}
return
finalResult
;
...
...
@@ -616,26 +401,16 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
double
maxOfThisSlot
=
0
;
double
minOfNextSlot
=
0
;
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_INT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_TINYINT
:
{
maxOfThisSlot
=
pSlot
->
range
.
iMaxVal
;
minOfNextSlot
=
next
.
iMinVal
;
break
;
};
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
{
if
(
IS_SIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
maxOfThisSlot
=
pSlot
->
range
.
i64MaxVal
;
minOfNextSlot
=
next
.
i64MinVal
;
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
maxOfThisSlot
=
pSlot
->
range
.
u64MaxVal
;
minOfNextSlot
=
next
.
u64MinVal
;
}
else
{
maxOfThisSlot
=
pSlot
->
range
.
dMaxVal
;
minOfNextSlot
=
next
.
dMinVal
;
break
;
};
case
TSDB_DATA_TYPE_BIGINT
:
{
maxOfThisSlot
=
(
double
)
pSlot
->
range
.
i64MaxVal
;
minOfNextSlot
=
(
double
)
next
.
i64MinVal
;
break
;
}
};
assert
(
minOfNextSlot
>
maxOfThisSlot
);
...
...
@@ -652,38 +427,8 @@ double getPercentileImpl(tMemBucket *pMemBucket, int32_t count, double fraction)
char
*
nextVal
=
thisVal
+
pMemBucket
->
bytes
;
double
td
=
1
.
0
,
nd
=
1
.
0
;
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_SMALLINT
:
{
td
=
*
(
int16_t
*
)
thisVal
;
nd
=
*
(
int16_t
*
)
nextVal
;
break
;
}
case
TSDB_DATA_TYPE_TINYINT
:
{
td
=
*
(
int8_t
*
)
thisVal
;
nd
=
*
(
int8_t
*
)
nextVal
;
break
;
}
case
TSDB_DATA_TYPE_INT
:
{
td
=
*
(
int32_t
*
)
thisVal
;
nd
=
*
(
int32_t
*
)
nextVal
;
break
;
};
case
TSDB_DATA_TYPE_FLOAT
:
{
td
=
GET_FLOAT_VAL
(
thisVal
);
nd
=
GET_FLOAT_VAL
(
nextVal
);
break
;
}
case
TSDB_DATA_TYPE_DOUBLE
:
{
td
=
GET_DOUBLE_VAL
(
thisVal
);
nd
=
GET_DOUBLE_VAL
(
nextVal
);
break
;
}
case
TSDB_DATA_TYPE_BIGINT
:
{
td
=
(
double
)
*
(
int64_t
*
)
thisVal
;
nd
=
(
double
)
*
(
int64_t
*
)
nextVal
;
break
;
}
}
GET_TYPED_DATA
(
td
,
double
,
pMemBucket
->
type
,
thisVal
);
GET_TYPED_DATA
(
nd
,
double
,
pMemBucket
->
type
,
nextVal
);
double
val
=
(
1
-
fraction
)
*
td
+
fraction
*
nd
;
tfree
(
buffer
);
...
...
@@ -741,20 +486,14 @@ double getPercentile(tMemBucket *pMemBucket, double percent) {
if
(
fabs
(
percent
-
100
.
0
)
<
DBL_EPSILON
||
(
percent
<
DBL_EPSILON
))
{
MinMaxEntry
*
pRange
=
&
pMemBucket
->
range
;
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_TINYINT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_INT
:
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
iMaxVal
:
pRange
->
iMinVal
;
case
TSDB_DATA_TYPE_BIGINT
:
{
if
(
IS_SIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
double
v
=
(
double
)(
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
i64MaxVal
:
pRange
->
i64MinVal
);
return
v
;
}
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
}
else
if
(
IS_UNSIGNED_NUMERIC_TYPE
(
pMemBucket
->
type
))
{
double
v
=
(
double
)(
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
u64MaxVal
:
pRange
->
u64MinVal
);
return
v
;
}
else
{
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
dMaxVal
:
pRange
->
dMinVal
;
default:
return
-
1
;
}
}
...
...
@@ -771,40 +510,9 @@ double getPercentile(tMemBucket *pMemBucket, double percent) {
bool
isIdenticalData
(
tMemBucket
*
pMemBucket
,
int32_t
index
)
{
tMemBucketSlot
*
pSeg
=
&
pMemBucket
->
pSlots
[
index
];
if
(
pMemBucket
->
type
==
TSDB_DATA_TYPE_INT
||
pMemBucket
->
type
==
TSDB_DATA_TYPE_BIGINT
||
pMemBucket
->
type
==
TSDB_DATA_TYPE_SMALLINT
||
pMemBucket
->
type
==
TSDB_DATA_TYPE_TINYINT
)
{
return
pSeg
->
range
.
i64MinVal
==
pSeg
->
range
.
i64MaxVal
;
}
if
(
pMemBucket
->
type
==
TSDB_DATA_TYPE_FLOAT
||
pMemBucket
->
type
==
TSDB_DATA_TYPE_DOUBLE
)
{
if
(
IS_FLOAT_TYPE
(
pMemBucket
->
type
))
{
return
fabs
(
pSeg
->
range
.
dMaxVal
-
pSeg
->
range
.
dMinVal
)
<
DBL_EPSILON
;
}
else
{
return
pSeg
->
range
.
i64MinVal
==
pSeg
->
range
.
i64MaxVal
;
}
return
false
;
}
/*
* get the first element of one slot into memory.
* if no data of current slot in memory, load it from disk
*/
char
*
getFirstElemOfMemBuffer
(
tMemBucketSlot
*
pSeg
,
int32_t
slotIdx
,
tFilePage
*
pPage
)
{
// STSBuf *pMemBuffer = pSeg->pBuffer[slotIdx];
char
*
thisVal
=
NULL
;
// if (pSeg->pBuffer[slotIdx]->numOfTotal != 0) {
//// thisVal = pSeg->pBuffer[slotIdx]->pHead->item.data;
// } else {
// /*
// * no data in memory, load one page into memory
// */
// tFlushoutInfo *pFlushInfo = &pMemBuffer->fileMeta.flushoutData.pFlushoutInfo[0];
// assert(pFlushInfo->numOfPages == pMemBuffer->fileMeta.nFileSize);
// int32_t ret;
// ret = fseek(pMemBuffer->file, pFlushInfo->startPageId * pMemBuffer->pageSize, SEEK_SET);
// UNUSED(ret);
// size_t sz = fread(pPage, pMemBuffer->pageSize, 1, pMemBuffer->file);
// UNUSED(sz);
// thisVal = pPage->data;
// }
return
thisVal
;
}
\ No newline at end of file
src/query/tests/percentileTest.cpp
0 → 100644
浏览文件 @
c04da26c
#include <gtest/gtest.h>
#include <iostream>
#include "qResultbuf.h"
#include "taos.h"
#include "taosdef.h"
#include "qPercentile.h"
namespace
{
tMemBucket
*
createBigIntDataBucket
(
int32_t
start
,
int32_t
end
)
{
tMemBucket
*
pBucket
=
tMemBucketCreate
(
sizeof
(
int64_t
),
TSDB_DATA_TYPE_BIGINT
,
start
,
end
);
for
(
int32_t
i
=
start
;
i
<=
end
;
++
i
)
{
int64_t
val
=
i
;
tMemBucketPut
(
pBucket
,
&
val
,
1
);
}
return
pBucket
;
}
tMemBucket
*
createIntDataBucket
(
int32_t
start
,
int32_t
end
)
{
tMemBucket
*
pBucket
=
tMemBucketCreate
(
sizeof
(
int32_t
),
TSDB_DATA_TYPE_INT
,
start
,
end
);
for
(
int32_t
i
=
start
;
i
<=
end
;
++
i
)
{
int32_t
val
=
i
;
tMemBucketPut
(
pBucket
,
&
val
,
1
);
}
return
pBucket
;
}
tMemBucket
*
createDoubleDataBucket
(
int32_t
start
,
int32_t
end
)
{
tMemBucket
*
pBucket
=
tMemBucketCreate
(
sizeof
(
double
),
TSDB_DATA_TYPE_DOUBLE
,
start
,
end
);
for
(
int32_t
i
=
start
;
i
<=
end
;
++
i
)
{
double
val
=
i
;
int32_t
ret
=
tMemBucketPut
(
pBucket
,
&
val
,
1
);
if
(
ret
!=
0
)
{
printf
(
"value out of range:%f"
,
val
);
}
}
return
pBucket
;
}
tMemBucket
*
createUnsignedDataBucket
(
int32_t
start
,
int32_t
end
,
int32_t
type
)
{
tMemBucket
*
pBucket
=
tMemBucketCreate
(
tDataTypeDesc
[
type
].
nSize
,
type
,
start
,
end
);
for
(
int32_t
i
=
start
;
i
<=
end
;
++
i
)
{
uint64_t
k
=
i
;
int32_t
ret
=
tMemBucketPut
(
pBucket
,
&
k
,
1
);
if
(
ret
!=
0
)
{
printf
(
"value out of range:%f"
,
k
);
}
}
return
pBucket
;
}
void
intDataTest
()
{
printf
(
"running %s
\n
"
,
__FUNCTION__
);
tMemBucket
*
pBucket
=
NULL
;
double
result
=
0.
;
pBucket
=
createIntDataBucket
(
0
,
0
);
result
=
getPercentile
(
pBucket
,
0
);
ASSERT_DOUBLE_EQ
(
result
,
0
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createIntDataBucket
(
0
,
1
);
result
=
getPercentile
(
pBucket
,
100
);
ASSERT_DOUBLE_EQ
(
result
,
1
);
result
=
getPercentile
(
pBucket
,
0
);
ASSERT_DOUBLE_EQ
(
result
,
0
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createIntDataBucket
(
-
1
,
1
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
0
);
result
=
getPercentile
(
pBucket
,
0
);
ASSERT_DOUBLE_EQ
(
result
,
-
1
);
result
=
getPercentile
(
pBucket
,
75
);
ASSERT_DOUBLE_EQ
(
result
,
0.5
);
result
=
getPercentile
(
pBucket
,
100
);
ASSERT_DOUBLE_EQ
(
result
,
1
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createIntDataBucket
(
0
,
99999
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
49999.5
);
tMemBucketDestroy
(
pBucket
);
}
void
bigintDataTest
()
{
printf
(
"running %s
\n
"
,
__FUNCTION__
);
tMemBucket
*
pBucket
=
NULL
;
double
result
=
0.0
;
pBucket
=
createBigIntDataBucket
(
-
1000
,
1000
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
0.
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createBigIntDataBucket
(
-
10000
,
10000
);
result
=
getPercentile
(
pBucket
,
100
);
ASSERT_DOUBLE_EQ
(
result
,
10000.0
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createBigIntDataBucket
(
-
10000
,
10000
);
result
=
getPercentile
(
pBucket
,
75
);
ASSERT_DOUBLE_EQ
(
result
,
5000.0
);
tMemBucketDestroy
(
pBucket
);
}
void
doubleDataTest
()
{
printf
(
"running %s
\n
"
,
__FUNCTION__
);
tMemBucket
*
pBucket
=
NULL
;
double
result
=
0
;
pBucket
=
createDoubleDataBucket
(
-
10
,
10
);
result
=
getPercentile
(
pBucket
,
0
);
ASSERT_DOUBLE_EQ
(
result
,
-
10.0
);
printf
(
"result is: %lf
\n
"
,
result
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createDoubleDataBucket
(
-
100000
,
100000
);
result
=
getPercentile
(
pBucket
,
25
);
ASSERT_DOUBLE_EQ
(
result
,
-
50000
);
printf
(
"result is: %lf
\n
"
,
result
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createDoubleDataBucket
(
-
100000
,
100000
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
0
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createDoubleDataBucket
(
-
100000
,
100000
);
result
=
getPercentile
(
pBucket
,
75
);
ASSERT_DOUBLE_EQ
(
result
,
50000
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createDoubleDataBucket
(
-
100000
,
100000
);
result
=
getPercentile
(
pBucket
,
100
);
ASSERT_DOUBLE_EQ
(
result
,
100000.0
);
printf
(
"result is: %lf
\n
"
,
result
);
tMemBucketDestroy
(
pBucket
);
}
/*
* large data test, we employ 0.1billion double data to calculated the percentile
* which is 800MB data
*/
void
largeDataTest
()
{
printf
(
"running : %s
\n
"
,
__FUNCTION__
);
tMemBucket
*
pBucket
=
NULL
;
double
result
=
0
;
struct
timeval
tv
;
gettimeofday
(
&
tv
,
NULL
);
int64_t
start
=
tv
.
tv_sec
;
printf
(
"start time: %"
PRId64
"
\n
"
,
tv
.
tv_sec
);
pBucket
=
createDoubleDataBucket
(
0
,
100000000
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
50000000
);
gettimeofday
(
&
tv
,
NULL
);
printf
(
"total elapsed time: %"
PRId64
" sec."
,
-
start
+
tv
.
tv_sec
);
printf
(
"the result of %d is: %lf
\n
"
,
50
,
result
);
tMemBucketDestroy
(
pBucket
);
}
void
qsortTest
()
{
printf
(
"running : %s
\n
"
,
__FUNCTION__
);
SSchema
field
[
1
]
=
{
{
TSDB_DATA_TYPE_INT
,
"k"
,
sizeof
(
int32_t
)},
};
const
int32_t
num
=
2000
;
int32_t
*
d
=
(
int32_t
*
)
malloc
(
sizeof
(
int32_t
)
*
num
);
for
(
int32_t
i
=
0
;
i
<
num
;
++
i
)
{
d
[
i
]
=
i
%
4
;
}
const
int32_t
numOfOrderCols
=
1
;
int32_t
orderColIdx
=
0
;
SColumnModel
*
pModel
=
createColumnModel
(
field
,
1
,
1000
);
tOrderDescriptor
*
pDesc
=
tOrderDesCreate
(
&
orderColIdx
,
numOfOrderCols
,
pModel
,
1
);
tColDataQSort
(
pDesc
,
num
,
0
,
num
-
1
,
(
char
*
)
d
,
1
);
for
(
int32_t
i
=
0
;
i
<
num
;
++
i
)
{
printf
(
"%d
\t
"
,
d
[
i
]);
}
printf
(
"
\n
"
);
destroyColumnModel
(
pModel
);
}
void
unsignedDataTest
()
{
printf
(
"running %s
\n
"
,
__FUNCTION__
);
tMemBucket
*
pBucket
=
NULL
;
double
result
=
0.0
;
pBucket
=
createUnsignedDataBucket
(
0
,
1000
,
TSDB_DATA_TYPE_UINT
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
500.0
);
tMemBucketDestroy
(
pBucket
);
pBucket
=
createUnsignedDataBucket
(
0
,
10000
,
TSDB_DATA_TYPE_UBIGINT
);
result
=
getPercentile
(
pBucket
,
100
);
ASSERT_DOUBLE_EQ
(
result
,
10000.0
);
result
=
getPercentile
(
pBucket
,
0
);
ASSERT_DOUBLE_EQ
(
result
,
0.0
);
result
=
getPercentile
(
pBucket
,
50
);
ASSERT_DOUBLE_EQ
(
result
,
5000
);
result
=
getPercentile
(
pBucket
,
75
);
ASSERT_DOUBLE_EQ
(
result
,
7500
);
tMemBucketDestroy
(
pBucket
);
}
}
// namespace
TEST
(
testCase
,
percentileTest
)
{
// qsortTest();
intDataTest
();
bigintDataTest
();
doubleDataTest
();
unsignedDataTest
();
largeDataTest
();
}
src/util/src/tcompare.c
浏览文件 @
c04da26c
...
...
@@ -30,24 +30,32 @@ int32_t compareInt8Val(const void *pLeft, const void *pRight) {
return
0
;
}
int32_t
compare
IntDouble
Val
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
int
64_t
lhs
=
GET_INT64_VAL
(
pLef
t
);
double
rhs
=
GET_DOUBLE_VAL
(
pRight
)
;
if
(
fabs
(
lhs
-
rhs
)
<
FLT_EPSILON
)
{
int32_t
compare
Uint32
Val
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
int
32_t
left
=
GET_UINT32_VAL
(
pLeft
),
right
=
GET_UINT32_VAL
(
pRigh
t
);
if
(
left
>
right
)
return
1
;
if
(
left
<
right
)
return
-
1
;
return
0
;
}
else
{
return
(
lhs
>
rhs
)
?
1
:
-
1
;
}
}
int32_t
compareDoubleIntVal
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
double
lhs
=
GET_DOUBLE_VAL
(
pLeft
);
int64_t
rhs
=
GET_INT64_VAL
(
pRight
);
if
(
fabs
(
lhs
-
rhs
)
<
FLT_EPSILON
)
{
int32_t
compareUint64Val
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
int64_t
left
=
GET_UINT64_VAL
(
pLeft
),
right
=
GET_UINT64_VAL
(
pRight
);
if
(
left
>
right
)
return
1
;
if
(
left
<
right
)
return
-
1
;
return
0
;
}
int32_t
compareUint16Val
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
int16_t
left
=
GET_UINT16_VAL
(
pLeft
),
right
=
GET_UINT16_VAL
(
pRight
);
if
(
left
>
right
)
return
1
;
if
(
left
<
right
)
return
-
1
;
return
0
;
}
int32_t
compareUint8Val
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
uint8_t
left
=
GET_UINT8_VAL
(
pLeft
),
right
=
GET_UINT8_VAL
(
pRight
);
if
(
left
>
right
)
return
1
;
if
(
left
<
right
)
return
-
1
;
return
0
;
}
else
{
return
(
lhs
>
rhs
)
?
1
:
-
1
;
}
}
int32_t
compareFloatVal
(
const
void
*
pLeft
,
const
void
*
pRight
)
{
...
...
@@ -369,15 +377,24 @@ __compar_fn_t getKeyComparFunc(int32_t keyType) {
case
TSDB_DATA_TYPE_DOUBLE
:
comparFn
=
compareDoubleVal
;
break
;
case
TSDB_DATA_TYPE_UTINYINT
:
comparFn
=
compareUint8Val
;
break
;
case
TSDB_DATA_TYPE_USMALLINT
:
comparFn
=
compareUint16Val
;
break
;
case
TSDB_DATA_TYPE_UINT
:
comparFn
=
compareUint32Val
;
break
;
case
TSDB_DATA_TYPE_UBIGINT
:
comparFn
=
compareUint64Val
;
break
;
case
TSDB_DATA_TYPE_BINARY
:
comparFn
=
compareLenPrefixedStr
;
break
;
case
TSDB_DATA_TYPE_NCHAR
:
comparFn
=
compareLenPrefixedWStr
;
break
;
default:
comparFn
=
compareInt32Val
;
break
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录