Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
taosdata
TDengine
提交
c7560202
TDengine
项目概览
taosdata
/
TDengine
1 年多 前同步成功
通知
1185
Star
22016
Fork
4786
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
TDengine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c7560202
编写于
1月 09, 2023
作者:
H
Haojun Liao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor: do some internal refactor.
上级
f90fa07e
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
33 addition
and
65 deletion
+33
-65
source/util/src/tcompression.c
source/util/src/tcompression.c
+33
-65
未找到文件。
source/util/src/tcompression.c
浏览文件 @
c7560202
...
@@ -265,7 +265,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
...
@@ -265,7 +265,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
int64_t
prev_value
=
0
;
int64_t
prev_value
=
0
;
while
(
1
)
{
while
(
1
)
{
if
(
count
==
nelements
)
break
;
if
(
_pos
==
nelements
)
break
;
uint64_t
w
=
0
;
uint64_t
w
=
0
;
memcpy
(
&
w
,
ip
,
LONG_BYTES
);
memcpy
(
&
w
,
ip
,
LONG_BYTES
);
...
@@ -284,8 +284,8 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
...
@@ -284,8 +284,8 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
int64_t
*
p
=
(
int64_t
*
)
output
;
int64_t
*
p
=
(
int64_t
*
)
output
;
if
(
selector
==
0
||
selector
==
1
)
{
if
(
selector
==
0
||
selector
==
1
)
{
int32_t
gRemainder
=
nelements
-
count
;
int32_t
gRemainder
=
nelements
-
_pos
;
int32_t
num
=
gRemainder
>
elems
?
elems
:
gRemainder
;
int32_t
num
=
gRemainder
<
elems
?
gRemainder
:
elems
;
int32_t
batch
=
num
>>
2
;
int32_t
batch
=
num
>>
2
;
int32_t
remainder
=
num
&
0x03
;
int32_t
remainder
=
num
&
0x03
;
...
@@ -302,100 +302,68 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
...
@@ -302,100 +302,68 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha
count
+=
num
;
count
+=
num
;
}
else
{
}
else
{
int32_t
gRemainder
=
(
nelements
-
count
);
int32_t
gRemainder
=
(
nelements
-
_pos
);
int32_t
num
=
gRemainder
>
elems
?
elems
:
gRemainder
;
int32_t
num
=
(
gRemainder
>
elems
)
?
elems
:
gRemainder
;
int32_t
batch
=
num
>>
2
;
int32_t
batch
=
num
>>
2
;
int32_t
remain
=
num
&
0x03
;
int32_t
remain
=
num
&
0x03
;
#if 1
#if 1
#if 1
__m256i
base
=
_mm256_set1_epi64x
(
w
);
__m256i
base
=
_mm256_set1_epi64x
(
w
);
__m256i
mask
_
=
_mm256_set1_epi64x
(
mask
);
__m256i
mask
Val
=
_mm256_set1_epi64x
(
mask
);
__m256i
shiftBits
=
_mm256_set_epi64x
(
bit
*
3
+
4
,
bit
*
2
+
4
,
bit
+
4
,
4
);
__m256i
shiftBits
=
_mm256_set_epi64x
(
bit
*
3
+
4
,
bit
*
2
+
4
,
bit
+
4
,
4
);
__m256i
inc
=
_mm256_set1_epi64x
(
bit
<<
2
);
__m256i
inc
=
_mm256_set1_epi64x
(
bit
<<
2
);
for
(
int32_t
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int32_t
i
=
0
;
i
<
batch
;
++
i
)
{
__m256i
after
=
_mm256_srlv_epi64
(
base
,
shiftBits
);
__m256i
after
=
_mm256_srlv_epi64
(
base
,
shiftBits
);
__m256i
zz
=
_mm256_and_si256
(
after
,
mask_
);
__m256i
zigzagVal
=
_mm256_and_si256
(
after
,
maskVal
);
printf
(
"1
\n
"
);
//
#define ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1))) // zigzag decode
//
ZIGZAG_DECODE(T, v) (((v) >> 1) ^ -((T)((v)&1)))
__m256i
signmask
=
_mm256_and_si256
(
_mm256_set
_epi64x
(
1
,
1
,
1
,
1
),
zz
);
__m256i
signmask
=
_mm256_and_si256
(
_mm256_set
1_epi64x
(
1
),
zigzagVal
);
signmask
=
_mm256_sub_epi64
(
_mm256_setzero_si256
(),
signmask
);
signmask
=
_mm256_sub_epi64
(
_mm256_setzero_si256
(),
signmask
);
// get the four zigzag values here
__m256i
delta
=
_mm256_xor_si256
(
_mm256_srli_epi64
(
zigzagVal
,
1
),
signmask
);
// now here we get the four zigzag value
// calculate the cumulative sum (prefix sum) for each number
__m256i
final
=
_mm256_xor_si256
(
_mm256_srli_epi64
(
zz
,
1
),
signmask
);
// calculate the cumulative sum (prefix sum)
// decode[0] = prev_value + final[0]
// decode[0] = prev_value + final[0]
// decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1]
// decode[1] = decode[0] + final[1] -----> prev_value + final[0] + final[1]
// decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2]
// decode[2] = decode[1] + final[1] -----> prev_value + final[0] + final[1] + final[2]
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3]
// decode[3] = decode[2] + final[1] -----> prev_value + final[0] + final[1] + final[2] + final[3]
printf
(
"2
\n
"
);
// 1, 2, 3, 4
//+ 0, 1, 2, 3
// 1, 3, 5, 7
// shift and add for the first round
__m128i
prev
=
_mm_set1_epi64x
(
prev_value
);
__m128i
prev
=
_mm_set1_epi64x
(
prev_value
);
final
=
_mm256_add_epi64
(
final
,
_mm256_slli_si256
(
final
,
8
));
delta
=
_mm256_add_epi64
(
delta
,
_mm256_slli_si256
(
delta
,
8
));
// x = 1, 2, 3, 4
_mm256_storeu_si256
((
__m256i
*
)
&
p
[
_pos
],
delta
);
// + 0, 1, 2, 3
// = 1, 3, 5, 7
// 1, 3, 5, 7
_mm256_storeu_si256
((
__m256i
*
)
&
p
[
_pos
],
final
);
//+ 0, 0, 1, 3
// 1, 3, 6, 10
__m128i
first
=
_mm_loadu_si128
((
__m128i
*
)
&
p
[
_pos
]);
// shift and add operation for the second round
__m128i
sec
=
_mm_add_epi64
(
_mm_loadu_si128
((
__m128i
*
)
&
p
[
_pos
+
2
]),
first
);
__m128i
firstPart
=
_mm_loadu_si128
((
__m128i
*
)
&
p
[
_pos
]);
sec
=
_mm_add_epi64
(
sec
,
prev
);
__m128i
secPart
=
_mm_add_epi64
(
_mm_loadu_si128
((
__m128i
*
)
&
p
[
_pos
+
2
]),
firstPart
);
first
=
_mm_add_epi64
(
first
,
prev
);
firstPart
=
_mm_add_epi64
(
firstPart
,
prev
);
secPart
=
_mm_add_epi64
(
secPart
,
prev
);
_mm_storeu_si128
((
__m128i
*
)
&
p
[
_pos
],
first
);
_mm_storeu_si128
((
__m128i
*
)
&
p
[
_pos
+
2
],
sec
);
// save it in the memory
_mm_storeu_si128
((
__m128i
*
)
&
p
[
_pos
],
firstPart
);
_mm_storeu_si128
((
__m128i
*
)
&
p
[
_pos
+
2
],
secPart
);
shiftBits
=
_mm256_add_epi64
(
shiftBits
,
inc
);
shiftBits
=
_mm256_add_epi64
(
shiftBits
,
inc
);
prev_value
=
p
[
_pos
+
3
];
prev_value
=
p
[
_pos
+
3
];
_pos
+=
4
;
_pos
+=
4
;
printf
(
"3
\n
"
);
}
}
#else
// manual unrolling, to erase the hotspot
uint64_t
zz
[
4
];
for
(
int32_t
i
=
0
;
i
<
batch
;
++
i
)
{
// handle the remain value
zigzag_value
=
((
w
>>
v
)
&
mask
);
zz
[
0
]
=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
v
+=
bit
;
zigzag_value
=
((
w
>>
v
)
&
mask
);
zz
[
1
]
=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
v
+=
bit
;
zigzag_value
=
((
w
>>
v
)
&
mask
);
zz
[
2
]
=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
v
+=
bit
;
zigzag_value
=
((
w
>>
v
)
&
mask
);
zz
[
3
]
=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
p
[
_pos
]
=
prev_value
+
zz
[
0
];
p
[
_pos
+
1
]
=
p
[
_pos
]
+
zz
[
1
];
p
[
_pos
+
2
]
=
p
[
_pos
+
1
]
+
zz
[
2
];
p
[
_pos
+
3
]
=
p
[
_pos
+
2
]
+
zz
[
3
];
prev_value
=
p
[
_pos
+
3
];
v
+=
bit
;
}
// handle the remain
for
(
int32_t
i
=
0
;
i
<
remain
;
i
++
)
{
for
(
int32_t
i
=
0
;
i
<
remain
;
i
++
)
{
zigzag_value
=
((
w
>>
v
)
&
mask
);
zigzag_value
=
((
w
>>
(
v
+
(
batch
*
bit
))
)
&
mask
);
prev_value
+=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
prev_value
+=
ZIGZAG_DECODE
(
int64_t
,
zigzag_value
);
p
[
_pos
++
]
=
prev_value
;
p
[
_pos
++
]
=
prev_value
;
v
+=
bit
;
v
+=
bit
;
}
}
count
+=
num
;
#endif
#else
#else
for
(
int32_t
i
=
0
;
i
<
elems
&&
count
<
nelements
;
i
++
,
count
++
)
{
for
(
int32_t
i
=
0
;
i
<
elems
&&
count
<
nelements
;
i
++
,
count
++
)
{
zigzag_value
=
((
w
>>
v
)
&
mask
);
zigzag_value
=
((
w
>>
v
)
&
mask
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录