Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
cac8f921
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
cac8f921
编写于
11月 28, 2016
作者:
A
Alexey Milovidov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
MergeSelector: attempt to do something that has sense [#METR-23632].
上级
8bc84e29
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
109 addition
and
25 deletion
+109
-25
dbms/include/DB/Storages/MergeTree/SimpleMergeSelector.h
dbms/include/DB/Storages/MergeTree/SimpleMergeSelector.h
+32
-4
dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp
dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp
+77
-21
未找到文件。
dbms/include/DB/Storages/MergeTree/SimpleMergeSelector.h
浏览文件 @
cac8f921
...
...
@@ -16,14 +16,42 @@ public:
* If parts has non-uniform sizes, then minumum number of parts to merge is effectively increased.
* This behaviour balances merge-tree workload.
* It called 'base', because merge-tree depth could be estimated as logarithm with that base.
*
* If base is higher - then tree gets more wide and narrow, lowering write amplification.
* If base is lower - then merges occurs more frequently, lowering number of parts in average.
*
* We need some balance between write amplification and number of parts.
*/
double
base
=
5
;
time_t
lower_base_after_seconds_start
=
300
;
time_t
lower_base_after_seconds_end
=
3600
;
/** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables:
*
* 1. Total number of parts in partition. If too much - then base is lowered.
* It means: when too much parts - do merges more urgently.
*
* 2. Minimum age of parts participating in merge. If higher age - then base is lowered.
* It means: do less wide merges only rarely.
*
* 3. Sum size of parts participating in merge. If higher - then more age is required to lower base. So, base is lowered slower.
* It means: for small parts, it's worth to merge faster, even not so wide or balanced.
*
* We have multivariative dependency. Let it be logarithmic of size and multi-linear by other variables,
* between some boundary points, and constant outside.
*/
size_t
min_size_to_lower_base
=
1024
*
1024
;
size_t
max_size_to_lower_base
=
100ULL
*
1024
*
1024
*
1024
;
time_t
min_age_to_lower_base_at_min_size
=
10
;
time_t
min_age_to_lower_base_at_max_size
=
10
;
time_t
max_age_to_lower_base_at_min_size
=
3600
;
time_t
max_age_to_lower_base_at_max_size
=
30
*
86400
;
size_t
min_parts_to_lower_base
=
10
;
size_t
max_parts_to_lower_base
=
50
;
size_t
lower_base_after_num_parts_start
=
10
;
size_t
lower_base_after_num_parts_end
=
50
;
/// Add this to size before all calculations. It means: merging even very small parts has it's fixed cost.
size_t
size_fixed_cost_to_add
=
5
*
1024
*
1024
;
/// Zero means unlimited.
size_t
max_parts_to_merge_at_once
=
100
;
...
...
dbms/src/Storages/MergeTree/SimpleMergeSelector.cpp
浏览文件 @
cac8f921
...
...
@@ -2,6 +2,7 @@
#include <DB/Common/interpolate.h>
#include <cmath>
#include <iostream>
namespace
DB
...
...
@@ -16,10 +17,13 @@ struct Estimator
{
using
Iterator
=
SimpleMergeSelector
::
PartsInPartition
::
const_iterator
;
void
consider
(
Iterator
begin
,
Iterator
end
,
size_t
sum_size
,
size_t
size_prev_at_left
)
void
consider
(
Iterator
begin
,
Iterator
end
,
size_t
sum_size
,
size_t
s
um_size_fixed_cost
,
size_t
s
ize_prev_at_left
)
{
double
current_score
=
score
(
end
-
begin
,
sum_size
);
double
current_score
=
score
(
end
-
begin
,
sum_size
,
sum_size_fixed_cost
);
/** Heuristic:
* Make some preference for ranges, that sum_size is like (in terms of ratio) to part previous at left.
*/
if
(
size_prev_at_left
>
sum_size
*
0.9
)
{
double
difference
=
std
::
abs
(
log2
(
static_cast
<
double
>
(
sum_size
)
/
size_prev_at_left
));
...
...
@@ -46,7 +50,7 @@ struct Estimator
return
SimpleMergeSelector
::
PartsInPartition
(
best_begin
,
best_end
);
}
static
double
score
(
double
count
,
double
sum_size
)
static
double
score
(
double
count
,
double
sum_size
,
double
sum_size_fixed_cost
)
{
/** Consider we have two alternative ranges of data parts to merge.
* Assume time to merge a range is proportional to sum size of its parts.
...
...
@@ -59,7 +63,7 @@ struct Estimator
*
* The integral is lower iff the following formula is lower:
*/
return
sum_size
/
(
count
-
1
);
return
(
sum_size
+
sum_size_fixed_cost
*
count
)
/
(
count
-
1
);
}
double
min_score
=
0
;
...
...
@@ -68,10 +72,72 @@ struct Estimator
};
/**
* 1 _____
* /
* 0_____/
* ^ ^
* min max
*/
double
mapPiecewiseLinearToUnit
(
double
value
,
double
min
,
double
max
)
{
return
value
<=
min
?
0
:
(
value
>=
max
?
1
:
((
value
-
min
)
/
(
max
-
min
)));
}
/** Is allowed to merge parts in range with specific properties.
*/
bool
allow
(
double
sum_size
,
double
max_size
,
double
min_age
,
double
range_size
,
double
partition_size
,
const
SimpleMergeSelector
::
Settings
&
settings
)
{
// std::cerr << "sum_size: " << sum_size << "\n";
/// Map size to 0..1 using logarithmic scale
double
size_normalized
=
mapPiecewiseLinearToUnit
(
log
(
1
+
sum_size
),
log
(
1
+
settings
.
min_size_to_lower_base
),
log
(
1
+
settings
.
max_size_to_lower_base
));
// std::cerr << "size_normalized: " << size_normalized << "\n";
/// Calculate boundaries for age
double
min_age_to_lower_base
=
interpolateLinear
(
settings
.
min_age_to_lower_base_at_min_size
,
settings
.
min_age_to_lower_base_at_max_size
,
size_normalized
);
double
max_age_to_lower_base
=
interpolateLinear
(
settings
.
max_age_to_lower_base_at_min_size
,
settings
.
max_age_to_lower_base_at_max_size
,
size_normalized
);
// std::cerr << "min_age_to_lower_base: " << min_age_to_lower_base << "\n";
// std::cerr << "max_age_to_lower_base: " << max_age_to_lower_base << "\n";
/// Map age to 0..1
double
age_normalized
=
mapPiecewiseLinearToUnit
(
min_age
,
min_age_to_lower_base
,
max_age_to_lower_base
);
// std::cerr << "age: " << min_age << "\n";
// std::cerr << "age_normalized: " << age_normalized << "\n";
/// Map partition_size to 0..1
double
num_parts_normalized
=
mapPiecewiseLinearToUnit
(
partition_size
,
settings
.
min_parts_to_lower_base
,
settings
.
max_parts_to_lower_base
);
// std::cerr << "partition_size: " << partition_size << "\n";
// std::cerr << "num_parts_normalized: " << num_parts_normalized << "\n";
double
combined_ratio
=
std
::
min
(
1.0
,
age_normalized
+
num_parts_normalized
);
// std::cerr << "combined_ratio: " << combined_ratio << "\n";
double
lowered_base
=
interpolateLinear
(
settings
.
base
,
1.0
,
combined_ratio
);
// std::cerr << "------- lowered_base: " << lowered_base << "\n";
return
(
sum_size
+
range_size
*
settings
.
size_fixed_cost_to_add
)
/
(
max_size
+
settings
.
size_fixed_cost_to_add
)
>=
lowered_base
;
}
void
selectWithinPartition
(
const
SimpleMergeSelector
::
PartsInPartition
&
parts
,
const
size_t
max_total_size_to_merge
,
const
time_t
current_min_part_age
,
Estimator
&
estimator
,
const
SimpleMergeSelector
::
Settings
&
settings
)
{
...
...
@@ -79,14 +145,6 @@ void selectWithinPartition(
if
(
parts_count
<=
1
)
return
;
double
actual_base
=
std
::
max
(
1.0
,
std
::
min
(
settings
.
base
,
std
::
min
(
interpolateLinear
(
settings
.
base
,
1.0
,
(
static_cast
<
double
>
(
parts_count
)
-
settings
.
lower_base_after_num_parts_start
)
/
(
settings
.
lower_base_after_num_parts_end
-
settings
.
lower_base_after_num_parts_start
)),
interpolateLinear
(
settings
.
base
,
1.0
,
(
static_cast
<
double
>
(
current_min_part_age
)
-
settings
.
lower_base_after_seconds_start
)
/
(
settings
.
lower_base_after_seconds_end
-
settings
.
lower_base_after_seconds_start
)))));
for
(
size_t
begin
=
0
;
begin
<
parts_count
;
++
begin
)
{
/// If too much parts, select only from first, to avoid complexity.
...
...
@@ -95,6 +153,7 @@ void selectWithinPartition(
size_t
sum_size
=
parts
[
begin
].
size
;
size_t
max_size
=
parts
[
begin
].
size
;
size_t
min_age
=
parts
[
begin
].
age
;
for
(
size_t
end
=
begin
+
2
;
end
<=
parts_count
;
++
end
)
{
...
...
@@ -102,18 +161,21 @@ void selectWithinPartition(
break
;
size_t
cur_size
=
parts
[
end
-
1
].
size
;
size_t
cur_age
=
parts
[
end
-
1
].
age
;
sum_size
+=
cur_size
;
max_size
=
std
::
max
(
max_size
,
cur_size
);
min_age
=
std
::
min
(
min_age
,
cur_age
);
if
(
max_total_size_to_merge
&&
sum_size
>
max_total_size_to_merge
)
break
;
if
(
static_cast
<
double
>
(
sum_size
)
/
max_size
>=
actual_base
)
if
(
allow
(
sum_size
,
max_size
,
min_age
,
end
-
begin
,
parts_count
,
settings
)
)
estimator
.
consider
(
parts
.
begin
()
+
begin
,
parts
.
begin
()
+
end
,
sum_size
,
settings
.
size_fixed_cost_to_add
,
begin
==
0
?
0
:
parts
[
begin
-
1
].
size
);
}
}
...
...
@@ -126,16 +188,10 @@ SimpleMergeSelector::PartsInPartition SimpleMergeSelector::select(
const
Partitions
&
partitions
,
const
size_t
max_total_size_to_merge
)
{
time_t
min_age
=
-
1
;
for
(
const
auto
&
partition
:
partitions
)
for
(
const
auto
&
part
:
partition
)
if
(
min_age
==
-
1
||
part
.
age
<
min_age
)
min_age
=
part
.
age
;
Estimator
estimator
;
for
(
const
auto
&
partition
:
partitions
)
selectWithinPartition
(
partition
,
max_total_size_to_merge
,
min_age
,
estimator
,
settings
);
selectWithinPartition
(
partition
,
max_total_size_to_merge
,
estimator
,
settings
);
return
estimator
.
getBest
();
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录