Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
9768c30b
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9768c30b
编写于
9月 09, 2020
作者:
H
Haojun Liao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[td-1372]
上级
5b6ca20f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
205 addition
and
24 deletion
+205
-24
src/client/src/tscFunctionImpl.c
src/client/src/tscFunctionImpl.c
+127
-9
src/query/inc/qPercentile.h
src/query/inc/qPercentile.h
+2
-2
src/query/src/qPercentile.c
src/query/src/qPercentile.c
+76
-13
未找到文件。
src/client/src/tscFunctionImpl.c
浏览文件 @
9768c30b
...
...
@@ -117,6 +117,10 @@ typedef struct SFirstLastInfo {
typedef
struct
SFirstLastInfo
SLastrowInfo
;
typedef
struct
SPercentileInfo
{
tMemBucket
*
pMemBucket
;
int32_t
stage
;
double
minval
;
double
maxval
;
int64_t
numOfElems
;
}
SPercentileInfo
;
typedef
struct
STopBotInfo
{
...
...
@@ -302,7 +306,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI
}
else
if
(
functionId
==
TSDB_FUNC_PERCT
)
{
*
type
=
(
int16_t
)
TSDB_DATA_TYPE_DOUBLE
;
*
bytes
=
(
int16_t
)
sizeof
(
double
);
*
interBytes
=
(
int16_t
)
sizeof
(
double
);
*
interBytes
=
(
int16_t
)
sizeof
(
SPercentileInfo
);
}
else
if
(
functionId
==
TSDB_FUNC_LEASTSQR
)
{
*
type
=
TSDB_DATA_TYPE_BINARY
;
*
bytes
=
TSDB_AVG_FUNCTION_INTER_BUFFER_SIZE
;
// string
...
...
@@ -2428,12 +2432,14 @@ static bool percentile_function_setup(SQLFunctionCtx *pCtx) {
if
(
!
function_setup
(
pCtx
))
{
return
false
;
}
// in the first round, get the min-max value of all involved data
SResultInfo
*
pResInfo
=
GET_RES_INFO
(
pCtx
);
SPercentileInfo
*
pInfo
=
pResInfo
->
interResultBuf
;
pInfo
->
minval
=
DBL_MAX
;
pInfo
->
maxval
=
-
DBL_MAX
;
pInfo
->
numOfElems
=
0
;
((
SPercentileInfo
*
)(
pResInfo
->
interResultBuf
))
->
pMemBucket
=
tMemBucketCreate
(
pCtx
->
inputBytes
,
pCtx
->
inputType
);
return
true
;
}
...
...
@@ -2442,7 +2448,65 @@ static void percentile_function(SQLFunctionCtx *pCtx) {
SResultInfo
*
pResInfo
=
GET_RES_INFO
(
pCtx
);
SPercentileInfo
*
pInfo
=
pResInfo
->
interResultBuf
;
// the first stage, only acquire the min/max value
if
(
pInfo
->
stage
==
0
)
{
if
(
pCtx
->
preAggVals
.
isSet
)
{
if
(
pInfo
->
minval
>
pCtx
->
preAggVals
.
statis
.
min
)
{
pInfo
->
minval
=
pCtx
->
preAggVals
.
statis
.
min
;
}
if
(
pInfo
->
maxval
<
pCtx
->
preAggVals
.
statis
.
max
)
{
pInfo
->
maxval
=
pCtx
->
preAggVals
.
statis
.
max
;
}
pInfo
->
numOfElems
+=
(
pCtx
->
size
-
pCtx
->
preAggVals
.
statis
.
numOfNull
);
}
else
{
for
(
int32_t
i
=
0
;
i
<
pCtx
->
size
;
++
i
)
{
char
*
data
=
GET_INPUT_CHAR_INDEX
(
pCtx
,
i
);
if
(
pCtx
->
hasNull
&&
isNull
(
data
,
pCtx
->
inputType
))
{
continue
;
}
// TODO extract functions
double
v
=
0
;
switch
(
pCtx
->
inputType
)
{
case
TSDB_DATA_TYPE_TINYINT
:
v
=
GET_INT8_VAL
(
data
);
break
;
case
TSDB_DATA_TYPE_SMALLINT
:
v
=
GET_INT16_VAL
(
data
);
break
;
case
TSDB_DATA_TYPE_BIGINT
:
v
=
(
double
)(
GET_INT64_VAL
(
data
));
break
;
case
TSDB_DATA_TYPE_FLOAT
:
v
=
GET_FLOAT_VAL
(
data
);
break
;
case
TSDB_DATA_TYPE_DOUBLE
:
v
=
GET_DOUBLE_VAL
(
data
);
break
;
default:
v
=
GET_INT32_VAL
(
data
);
break
;
}
if
(
v
<
pInfo
->
minval
)
{
pInfo
->
minval
=
v
;
}
if
(
v
>
pInfo
->
maxval
)
{
pInfo
->
maxval
=
v
;
}
pInfo
->
numOfElems
+=
1
;
}
}
return
;
}
// the second stage, calculate the true percentile value
for
(
int32_t
i
=
0
;
i
<
pCtx
->
size
;
++
i
)
{
char
*
data
=
GET_INPUT_CHAR_INDEX
(
pCtx
,
i
);
if
(
pCtx
->
hasNull
&&
isNull
(
data
,
pCtx
->
inputType
))
{
...
...
@@ -2462,10 +2526,47 @@ static void percentile_function_f(SQLFunctionCtx *pCtx, int32_t index) {
if
(
pCtx
->
hasNull
&&
isNull
(
pData
,
pCtx
->
inputType
))
{
return
;
}
SResultInfo
*
pResInfo
=
GET_RES_INFO
(
pCtx
);
SPercentileInfo
*
pInfo
=
(
SPercentileInfo
*
)
pResInfo
->
interResultBuf
;
if
(
pInfo
->
stage
==
0
)
{
// TODO extract functions
double
v
=
0
;
switch
(
pCtx
->
inputType
)
{
case
TSDB_DATA_TYPE_TINYINT
:
v
=
GET_INT8_VAL
(
pData
);
break
;
case
TSDB_DATA_TYPE_SMALLINT
:
v
=
GET_INT16_VAL
(
pData
);
break
;
case
TSDB_DATA_TYPE_BIGINT
:
v
=
(
double
)(
GET_INT64_VAL
(
pData
));
break
;
case
TSDB_DATA_TYPE_FLOAT
:
v
=
GET_FLOAT_VAL
(
pData
);
break
;
case
TSDB_DATA_TYPE_DOUBLE
:
v
=
GET_DOUBLE_VAL
(
pData
);
break
;
default:
v
=
GET_INT32_VAL
(
pData
);
break
;
}
if
(
v
<
pInfo
->
minval
)
{
pInfo
->
minval
=
v
;
}
if
(
v
>
pInfo
->
maxval
)
{
pInfo
->
maxval
=
v
;
}
pInfo
->
numOfElems
+=
1
;
return
;
}
tMemBucketPut
(
pInfo
->
pMemBucket
,
pData
,
1
);
SET_VAL
(
pCtx
,
1
,
1
);
...
...
@@ -2488,6 +2589,23 @@ static void percentile_finalizer(SQLFunctionCtx *pCtx) {
doFinalizer
(
pCtx
);
}
static
void
percentile_next_step
(
SQLFunctionCtx
*
pCtx
)
{
SResultInfo
*
pResInfo
=
GET_RES_INFO
(
pCtx
);
SPercentileInfo
*
pInfo
=
pResInfo
->
interResultBuf
;
if
(
pInfo
->
stage
==
0
)
{
// all data are null, set it completed
if
(
pInfo
->
numOfElems
==
0
)
{
pResInfo
->
complete
=
true
;
}
pInfo
->
stage
+=
1
;
pInfo
->
pMemBucket
=
tMemBucketCreate
(
pCtx
->
inputBytes
,
pCtx
->
inputType
,
pInfo
->
minval
,
pInfo
->
maxval
);
}
else
{
pResInfo
->
complete
=
true
;
}
}
//////////////////////////////////////////////////////////////////////////////////
static
SAPercentileInfo
*
getAPerctInfo
(
SQLFunctionCtx
*
pCtx
)
{
SResultInfo
*
pResInfo
=
GET_RES_INFO
(
pCtx
);
...
...
@@ -4513,7 +4631,7 @@ SQLAggFuncElem aAggs[] = {{
percentile_function_setup
,
percentile_function
,
percentile_function_f
,
no
_next_step
,
percentile
_next_step
,
percentile_finalizer
,
noop1
,
noop1
,
...
...
src/query/inc/qPercentile.h
浏览文件 @
9768c30b
...
...
@@ -64,11 +64,11 @@ typedef struct tMemBucket {
__perc_hash_func_t
hashFunc
;
}
tMemBucket
;
tMemBucket
*
tMemBucketCreate
(
int16_t
nElemSize
,
int16_t
dataType
);
tMemBucket
*
tMemBucketCreate
(
int16_t
nElemSize
,
int16_t
dataType
,
double
minval
,
double
maxval
);
void
tMemBucketDestroy
(
tMemBucket
*
pBucket
);
void
tMemBucketPut
(
tMemBucket
*
pBucket
,
const
void
*
data
,
size_t
size
);
int32_t
tMemBucketPut
(
tMemBucket
*
pBucket
,
const
void
*
data
,
size_t
size
);
double
getPercentile
(
tMemBucket
*
pMemBucket
,
double
percent
);
...
...
src/query/src/qPercentile.c
浏览文件 @
9768c30b
...
...
@@ -70,6 +70,33 @@ static void resetBoundingBox(MinMaxEntry* range, int32_t type) {
}
}
static
int32_t
setBoundingBox
(
MinMaxEntry
*
range
,
int16_t
type
,
double
minval
,
double
maxval
)
{
if
(
minval
>
maxval
)
{
return
-
1
;
}
switch
(
type
)
{
case
TSDB_DATA_TYPE_TINYINT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_INT
:
range
->
iMinVal
=
minval
;
range
->
iMaxVal
=
maxval
;
break
;
case
TSDB_DATA_TYPE_BIGINT
:
range
->
i64MinVal
=
minval
;
range
->
i64MaxVal
=
maxval
;
break
;
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
range
->
dMinVal
=
minval
;
range
->
dMaxVal
=
maxval
;
break
;
}
return
0
;
}
static
void
resetPosInfo
(
SSlotInfo
*
pInfo
)
{
pInfo
->
size
=
0
;
pInfo
->
pageId
=
-
1
;
...
...
@@ -135,6 +162,11 @@ int32_t tBucketBigIntHash(tMemBucket *pBucket, const void *value) {
return
index
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
i64MinVal
||
v
>
pBucket
->
range
.
i64MaxVal
)
{
return
-
1
;
}
// todo hash for bigint and float and double
int64_t
span
=
pBucket
->
range
.
i64MaxVal
-
pBucket
->
range
.
i64MinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
...
...
@@ -179,6 +211,11 @@ int32_t tBucketIntHash(tMemBucket *pBucket, const void *value) {
return
index
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
iMinVal
||
v
>
pBucket
->
range
.
iMaxVal
)
{
return
-
1
;
}
// divide a range of [iMinVal, iMaxVal] into 1024 buckets
int32_t
span
=
pBucket
->
range
.
iMaxVal
-
pBucket
->
range
.
iMinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
...
...
@@ -209,6 +246,12 @@ int32_t tBucketDoubleHash(tMemBucket *pBucket, const void *value) {
double
posx
=
(
v
+
DBL_MAX
)
/
x
;
return
((
int32_t
)
posx
)
%
pBucket
->
numOfSlots
;
}
else
{
// out of range
if
(
v
<
pBucket
->
range
.
dMinVal
||
v
>
pBucket
->
range
.
dMaxVal
)
{
return
-
1
;
}
// divide a range of [dMinVal, dMaxVal] into 1024 buckets
double
span
=
pBucket
->
range
.
dMaxVal
-
pBucket
->
range
.
dMinVal
;
if
(
span
<
pBucket
->
numOfSlots
)
{
...
...
@@ -262,7 +305,7 @@ static void resetSlotInfo(tMemBucket* pBucket) {
}
}
tMemBucket
*
tMemBucketCreate
(
int16_t
nElemSize
,
int16_t
dataType
)
{
tMemBucket
*
tMemBucketCreate
(
int16_t
nElemSize
,
int16_t
dataType
,
double
minval
,
double
maxval
)
{
tMemBucket
*
pBucket
=
(
tMemBucket
*
)
calloc
(
1
,
sizeof
(
tMemBucket
));
if
(
pBucket
==
NULL
)
{
return
NULL
;
...
...
@@ -278,9 +321,14 @@ tMemBucket *tMemBucketCreate(int16_t nElemSize, int16_t dataType) {
pBucket
->
maxCapacity
=
200000
;
if
(
setBoundingBox
(
&
pBucket
->
range
,
pBucket
->
type
,
minval
,
maxval
)
!=
0
)
{
uError
(
"MemBucket:%p, invalid value range: %f-%f"
,
pBucket
,
minval
,
maxval
);
free
(
pBucket
);
return
NULL
;
}
pBucket
->
elemPerPage
=
(
pBucket
->
bufPageSize
-
sizeof
(
tFilePage
))
/
pBucket
->
bytes
;
pBucket
->
comparFn
=
getKeyComparFunc
(
pBucket
->
type
);
resetBoundingBox
(
&
pBucket
->
range
,
pBucket
->
type
);
pBucket
->
hashFunc
=
getHashFunc
(
pBucket
->
type
);
if
(
pBucket
->
hashFunc
==
NULL
)
{
...
...
@@ -395,23 +443,25 @@ void tMemBucketUpdateBoundingBox(MinMaxEntry *r, char *data, int32_t dataType) {
/*
* in memory bucket, we only accept data array list
*/
void
tMemBucketPut
(
tMemBucket
*
pBucket
,
const
void
*
data
,
size_t
size
)
{
int32_t
tMemBucketPut
(
tMemBucket
*
pBucket
,
const
void
*
data
,
size_t
size
)
{
assert
(
pBucket
!=
NULL
&&
data
!=
NULL
&&
size
>
0
);
pBucket
->
total
+=
(
int32_t
)
size
;
int32_t
bytes
=
pBucket
->
bytes
;
for
(
int32_t
i
=
0
;
i
<
size
;
++
i
)
{
char
*
d
=
(
char
*
)
data
+
i
*
bytes
;
int32_t
slotIdx
=
(
pBucket
->
hashFunc
)(
pBucket
,
d
);
assert
(
slotIdx
>=
0
);
int32_t
index
=
(
pBucket
->
hashFunc
)(
pBucket
,
d
);
if
(
index
==
-
1
)
{
// the value is out of range, do not add it into bucket
return
-
1
;
}
tMemBucketSlot
*
pSlot
=
&
pBucket
->
pSlots
[
slotId
x
];
tMemBucketSlot
*
pSlot
=
&
pBucket
->
pSlots
[
inde
x
];
tMemBucketUpdateBoundingBox
(
&
pSlot
->
range
,
d
,
pBucket
->
type
);
// ensure available memory pages to allocate
int32_t
groupId
=
getGroupId
(
pBucket
->
numOfSlots
,
slotId
x
,
pBucket
->
times
);
int32_t
groupId
=
getGroupId
(
pBucket
->
numOfSlots
,
inde
x
,
pBucket
->
times
);
int32_t
pageId
=
-
1
;
if
(
pSlot
->
info
.
data
==
NULL
||
pSlot
->
info
.
data
->
num
>=
pBucket
->
elemPerPage
)
{
...
...
@@ -432,10 +482,12 @@ void tMemBucketPut(tMemBucket *pBucket, const void *data, size_t size) {
pSlot
->
info
.
data
->
num
+=
1
;
pSlot
->
info
.
size
+=
1
;
}
return
0
;
}
////////////////////////////////////////////////////////////////////////////////////////////
static
void
findMaxMinValue
(
tMemBucket
*
pMemBucket
,
double
*
maxVal
,
double
*
minVal
)
{
static
UNUSED_FUNC
void
findMaxMinValue
(
tMemBucket
*
pMemBucket
,
double
*
maxVal
,
double
*
minVal
)
{
*
minVal
=
DBL_MAX
;
*
maxVal
=
-
DBL_MAX
;
...
...
@@ -681,16 +733,27 @@ double getPercentile(tMemBucket *pMemBucket, double percent) {
// find the min/max value, no need to scan all data in bucket
if
(
fabs
(
percent
-
100
.
0
)
<
DBL_EPSILON
||
(
percent
<
DBL_EPSILON
))
{
double
minx
=
0
,
maxx
=
0
;
findMaxMinValue
(
pMemBucket
,
&
maxx
,
&
minx
);
MinMaxEntry
*
pRange
=
&
pMemBucket
->
range
;
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
maxx
:
minx
;
switch
(
pMemBucket
->
type
)
{
case
TSDB_DATA_TYPE_TINYINT
:
case
TSDB_DATA_TYPE_SMALLINT
:
case
TSDB_DATA_TYPE_INT
:
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
iMaxVal
:
pRange
->
iMinVal
;
case
TSDB_DATA_TYPE_BIGINT
:
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
i64MaxVal
:
pRange
->
i64MinVal
;
case
TSDB_DATA_TYPE_FLOAT
:
case
TSDB_DATA_TYPE_DOUBLE
:
return
fabs
(
percent
-
100
)
<
DBL_EPSILON
?
pRange
->
dMaxVal
:
pRange
->
dMinVal
;
default:
return
-
1
;
}
}
double
percentVal
=
(
percent
*
(
pMemBucket
->
total
-
1
))
/
((
double
)
100
.
0
);
int32_t
orderIdx
=
(
int32_t
)
percentVal
;
// do put data by using buckets
int32_t
orderIdx
=
(
int32_t
)
percentVal
;
return
getPercentileImpl
(
pMemBucket
,
orderIdx
,
percentVal
-
orderIdx
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录