Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
9dcf9248
T
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9dcf9248
编写于
5月 16, 2022
作者:
G
Ganlin Zhao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(query): add HYPERLOGLOG function
上级
ede4a57c
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
146 addition
and
1 deletion
+146
-1
source/libs/function/src/builtinsimpl.c
source/libs/function/src/builtinsimpl.c
+146
-1
未找到文件。
source/libs/function/src/builtinsimpl.c
浏览文件 @
9dcf9248
...
...
@@ -26,7 +26,13 @@
#define MAVG_MAX_POINTS_NUM 1000
#define SAMPLE_MAX_POINTS_NUM 1000
#define TAIL_MAX_POINTS_NUM 100
#define TAIL_MAX_OFFSET 100
#define TAIL_MAX_OFFSET 10
#define HLL_BUCKET_BITS 14 // The bits of the bucket
#define HLL_DATA_BITS (64-HLL_BUCKET_BITS)
#define HLL_BUCKETS (1<<HLL_BUCKET_BITS)
#define HLL_BUCKET_MASK (HLL_BUCKETS-1)
#define HLL_ALPHA_INF 0.721347520444481703680 // constant for 0.5/ln(2)
typedef
struct
SSumRes
{
union
{
...
...
@@ -129,6 +135,11 @@ typedef enum {
LOG_BIN
}
EHistoBinType
;
typedef
struct
SHLLFuncInfo
{
uint64_t
result
;
uint8_t
buckets
[
HLL_BUCKETS
];
}
SHLLInfo
;
typedef
struct
SStateInfo
{
union
{
int64_t
count
;
...
...
@@ -2729,6 +2740,140 @@ int32_t histogramFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
return
pResInfo
->
numOfRes
;
}
bool
getHLLFuncEnv
(
SFunctionNode
*
UNUSED_PARAM
(
pFunc
),
SFuncExecEnv
*
pEnv
)
{
pEnv
->
calcMemSize
=
sizeof
(
SHLLInfo
);
return
true
;
}
static
uint8_t
hllCountNum
(
void
*
data
,
int32_t
bytes
,
int32_t
*
buk
)
{
uint64_t
hash
=
MurmurHash3_64
(
data
,
bytes
);
int32_t
index
=
hash
&
HLL_BUCKET_MASK
;
hash
>>=
HLL_BUCKET_BITS
;
hash
|=
((
uint64_t
)
1
<<
HLL_DATA_BITS
);
uint64_t
bit
=
1
;
uint8_t
count
=
1
;
while
((
hash
&
bit
)
==
0
)
{
count
++
;
bit
<<=
1
;
}
*
buk
=
index
;
return
count
;
}
static
void
hllBucketHisto
(
uint8_t
*
buckets
,
int32_t
*
bucketHisto
)
{
uint64_t
*
word
=
(
uint64_t
*
)
buckets
;
uint8_t
*
bytes
;
for
(
int32_t
j
=
0
;
j
<
HLL_BUCKETS
>>
3
;
j
++
)
{
if
(
*
word
==
0
)
{
bucketHisto
[
0
]
+=
8
;
}
else
{
bytes
=
(
uint8_t
*
)
word
;
bucketHisto
[
bytes
[
0
]]
++
;
bucketHisto
[
bytes
[
1
]]
++
;
bucketHisto
[
bytes
[
2
]]
++
;
bucketHisto
[
bytes
[
3
]]
++
;
bucketHisto
[
bytes
[
4
]]
++
;
bucketHisto
[
bytes
[
5
]]
++
;
bucketHisto
[
bytes
[
6
]]
++
;
bucketHisto
[
bytes
[
7
]]
++
;
}
word
++
;
}
}
static
double
hllTau
(
double
x
)
{
if
(
x
==
0
.
||
x
==
1
.)
return
0
.;
double
zPrime
;
double
y
=
1
.
0
;
double
z
=
1
-
x
;
do
{
x
=
sqrt
(
x
);
zPrime
=
z
;
y
*=
0
.
5
;
z
-=
pow
(
1
-
x
,
2
)
*
y
;
}
while
(
zPrime
!=
z
);
return
z
/
3
;
}
static
double
hllSigma
(
double
x
)
{
if
(
x
==
1
.
0
)
return
INFINITY
;
double
zPrime
;
double
y
=
1
;
double
z
=
x
;
do
{
x
*=
x
;
zPrime
=
z
;
z
+=
x
*
y
;
y
+=
y
;
}
while
(
zPrime
!=
z
);
return
z
;
}
// estimate the cardinality, the algorithm refer this paper: "New cardinality estimation algorithms for HyperLogLog sketches"
static
uint64_t
hllCountCnt
(
uint8_t
*
buckets
)
{
double
m
=
HLL_BUCKETS
;
int32_t
buckethisto
[
64
]
=
{
0
};
hllBucketHisto
(
buckets
,
buckethisto
);
double
z
=
m
*
hllTau
((
m
-
buckethisto
[
HLL_DATA_BITS
+
1
])
/
(
double
)
m
);
for
(
int
j
=
HLL_DATA_BITS
;
j
>=
1
;
--
j
)
{
z
+=
buckethisto
[
j
];
z
*=
0
.
5
;
}
z
+=
m
*
hllSigma
(
buckethisto
[
0
]
/
(
double
)
m
);
double
E
=
(
double
)
llroundl
(
HLL_ALPHA_INF
*
m
*
m
/
z
);
return
(
uint64_t
)
E
;
}
int32_t
hllFunction
(
SqlFunctionCtx
*
pCtx
)
{
SHLLInfo
*
pInfo
=
GET_ROWCELL_INTERBUF
(
GET_RES_INFO
(
pCtx
));
SInputColumnInfoData
*
pInput
=
&
pCtx
->
input
;
SColumnInfoData
*
pCol
=
pInput
->
pData
[
0
];
int32_t
type
=
pCol
->
info
.
type
;
int32_t
bytes
=
pCol
->
info
.
bytes
;
int32_t
start
=
pInput
->
startRowIndex
;
int32_t
numOfRows
=
pInput
->
numOfRows
;
int32_t
numOfElems
=
0
;
for
(
int32_t
i
=
start
;
i
<
numOfRows
+
start
;
++
i
)
{
if
(
pCol
->
hasNull
&&
colDataIsNull_s
(
pCol
,
i
))
{
continue
;
}
numOfElems
++
;
char
*
data
=
colDataGetData
(
pCol
,
i
);
if
(
IS_VAR_DATA_TYPE
(
type
))
{
data
=
varDataVal
(
data
);
bytes
-=
VARSTR_HEADER_SIZE
;
}
int32_t
index
=
0
;
uint8_t
count
=
hllCountNum
(
data
,
bytes
,
&
index
);
uint8_t
oldcount
=
pInfo
->
buckets
[
index
];
if
(
count
>
oldcount
)
{
pInfo
->
buckets
[
index
]
=
count
;
}
}
SET_VAL
(
GET_RES_INFO
(
pCtx
),
numOfElems
,
1
);
return
TSDB_CODE_SUCCESS
;
}
int32_t
hllFinalize
(
SqlFunctionCtx
*
pCtx
,
SSDataBlock
*
pBlock
)
{
SHLLInfo
*
pInfo
=
GET_ROWCELL_INTERBUF
(
GET_RES_INFO
(
pCtx
));
pInfo
->
result
=
hllCountCnt
(
pInfo
->
buckets
);
return
functionFinalize
(
pCtx
,
pBlock
);
}
bool
getStateFuncEnv
(
SFunctionNode
*
UNUSED_PARAM
(
pFunc
),
SFuncExecEnv
*
pEnv
)
{
pEnv
->
calcMemSize
=
sizeof
(
SStateInfo
);
return
true
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录