Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
db3a67a4
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
db3a67a4
编写于
9月 22, 2017
作者:
V
Vitaliy Lyudvichenko
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add clearer RangeFiltered implementation. [#CLICKHOUSE-3178]
上级
e2a12d10
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
195 addition
and
50 deletion
+195
-50
.gitignore
.gitignore
+1
-0
dbms/src/Storages/MergeTree/MergeTreeData.cpp
dbms/src/Storages/MergeTree/MergeTreeData.cpp
+19
-22
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
+21
-10
dbms/src/Storages/StorageReplicatedMergeTree.cpp
dbms/src/Storages/StorageReplicatedMergeTree.cpp
+3
-3
dbms/src/Storages/tests/gtest_range_filtered.cpp.cpp
dbms/src/Storages/tests/gtest_range_filtered.cpp.cpp
+45
-0
dbms/tests/integration/test_extreme_deduplication/test.py
dbms/tests/integration/test_extreme_deduplication/test.py
+1
-1
libs/libcommon/include/common/RangeFiltered.h
libs/libcommon/include/common/RangeFiltered.h
+105
-14
未找到文件。
.gitignore
浏览文件 @
db3a67a4
...
...
@@ -35,6 +35,7 @@ cmake_install.cmake
CTestTestfile.cmake
*.a
*.o
cmake-build-*
# Python cache
*.pyc
...
...
dbms/src/Storages/MergeTree/MergeTreeData.cpp
浏览文件 @
db3a67a4
...
...
@@ -393,10 +393,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
LOG_DEBUG
(
log
,
"Loading data parts"
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
data_parts_mutex
);
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
data_parts
.
clear
();
//all_data_parts.clear();
Strings
part_file_names
;
Poco
::
DirectoryIterator
end
;
...
...
@@ -496,6 +493,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
}
part
->
modification_time
=
Poco
::
File
(
full_path
+
file_name
).
getLastModified
().
epochTime
();
/// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later
part
->
state
=
DataPartState
::
Committed
;
data_parts
.
insert
(
part
);
...
...
@@ -510,18 +508,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
for
(
auto
&
part
:
broken_parts_to_detach
)
part
->
renameAddPrefix
(
true
,
""
);
//all_data_parts = data_parts;
/// Delete from the set of current parts those parts that are covered by another part (those parts that
/// were merged), but that for some reason are still not deleted from the filesystem.
/// Deletion of files will be performed later in the clearOldParts() method.
if
(
data_parts
.
size
()
>=
2
)
{
DataParts
::
iterator
prev_jt
=
data_parts
.
begin
();
DataParts
::
iterator
curr_jt
=
prev_jt
;
++
curr_jt
;
while
(
curr_jt
!=
data_parts
.
end
())
auto
committed_parts
=
getDataPartsRange
({
DataPartState
::
Committed
});
auto
prev_jt
=
committed_parts
.
begin
();
auto
curr_jt
=
std
::
next
(
prev_jt
);
while
(
curr_jt
!=
committed_parts
.
end
())
{
/// Don't consider data parts belonging to different partitions.
if
((
*
curr_jt
)
->
info
.
partition_id
!=
(
*
prev_jt
)
->
info
.
partition_id
)
...
...
@@ -534,14 +531,15 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if
((
*
curr_jt
)
->
contains
(
**
prev_jt
))
{
(
*
prev_jt
)
->
remove_time
=
(
*
prev_jt
)
->
modification_time
;
data_parts
.
erase
(
prev_jt
);
(
*
prev_jt
)
->
state
=
DataPartState
::
Outdated
;
/// prev_jt becomes invalid here
prev_jt
=
curr_jt
;
++
curr_jt
;
}
else
if
((
*
prev_jt
)
->
contains
(
**
curr_jt
))
{
(
*
curr_jt
)
->
remove_time
=
(
*
curr_jt
)
->
modification_time
;
data_parts
.
erase
(
curr_jt
++
);
(
*
curr_jt
)
->
state
=
DataPartState
::
Outdated
;
/// curr_jt becomes invalid here
++
curr_jt
;
}
else
{
...
...
@@ -651,7 +649,7 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector &
for
(
auto
&
part
:
parts
)
{
/// We should modify it under data_parts_mutex
part
->
check
State
({
DataPartState
::
Deleting
});
part
->
assert
State
({
DataPartState
::
Deleting
});
part
->
state
=
DataPartState
::
Outdated
;
}
}
...
...
@@ -709,12 +707,10 @@ void MergeTreeData::dropAllData()
LOG_TRACE
(
log
,
"dropAllData: waiting for locks."
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
data_parts_mutex
);
//std::lock_guard<std::mutex> lock_all(all_data_parts_mutex);
LOG_TRACE
(
log
,
"dropAllData: removing data from memory."
);
data_parts
.
clear
();
//all_data_parts.clear();
column_sizes
.
clear
();
context
.
dropCaches
();
...
...
@@ -897,7 +893,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
const
IDataType
*
observed_type
;
if
(
is_nullable
)
{
const
DataTypeNullable
&
nullable_type
=
static_cast
<
const
DataTypeNullable
&>
(
*
column
.
type
);
auto
&
nullable_type
=
static_cast
<
const
DataTypeNullable
&>
(
*
column
.
type
);
observed_type
=
nullable_type
.
getNestedType
().
get
();
}
else
...
...
@@ -1321,7 +1317,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
if
(
out_transaction
&&
out_transaction
->
data
)
throw
Exception
(
"Using the same MergeTreeData::Transaction for overlapping transactions is invalid"
,
ErrorCodes
::
LOGICAL_ERROR
);
part
->
check
State
({
DataPartState
::
Temporary
});
part
->
assert
State
({
DataPartState
::
Temporary
});
DataPartsVector
replaced
;
{
...
...
@@ -1354,7 +1350,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
if
(
it_duplicate
!=
data_parts
.
end
())
{
String
message
=
"Part "
+
(
*
it_duplicate
)
->
getNameWithState
()
+
" already exists"
;
if
((
*
it_duplicate
)
->
tryC
heckState
({
DataPartState
::
Outdated
,
DataPartState
::
Deleting
}))
if
((
*
it_duplicate
)
->
c
heckState
({
DataPartState
::
Outdated
,
DataPartState
::
Deleting
}))
message
+=
", but it will be deleted soon"
;
throw
Exception
(
message
,
ErrorCodes
::
DUPLICATE_DATA_PART
);
...
...
@@ -1369,7 +1365,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace(
auto
check_replacing_part_state
=
[
&
]
(
const
DataPartPtr
&
cur_part
)
{
cur_part
->
check
State
({
DataPartState
::
PreCommitted
,
DataPartState
::
Committed
});
cur_part
->
assert
State
({
DataPartState
::
PreCommitted
,
DataPartState
::
Committed
});
if
(
cur_part
->
state
==
DataPartState
::
PreCommitted
)
throw
Exception
(
"Could not add part "
+
new_name
+
" while replacing part "
+
cur_part
->
name
+
" is in pre-committed state"
,
ErrorCodes
::
LOGICAL_ERROR
);
};
...
...
@@ -1486,7 +1482,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo
if
(
!
data_parts
.
count
(
part
))
throw
Exception
(
"Part "
+
part
->
getNameWithState
()
+
" not found in data_parts"
,
ErrorCodes
::
LOGICAL_ERROR
);
part
->
check
State
({
DataPartState
::
PreCommitted
,
DataPartState
::
Committed
,
DataPartState
::
Outdated
});
part
->
assert
State
({
DataPartState
::
PreCommitted
,
DataPartState
::
Committed
,
DataPartState
::
Outdated
});
}
auto
remove_time
=
clear_without_timeout
?
0
:
time
(
nullptr
);
...
...
@@ -1748,7 +1744,8 @@ void MergeTreeData::calculateColumnSizesImpl()
{
column_sizes
.
clear
();
for
(
const
auto
&
part
:
data_parts
)
/// Take into account only committed parts
for
(
const
auto
&
part
:
getDataPartsRange
({
DataPartState
::
Committed
}))
addPartContributionToColumnSizes
(
part
);
}
...
...
@@ -2075,9 +2072,9 @@ void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_
std
::
lock_guard
<
std
::
mutex
>
lock
(
data
->
data_parts_mutex
);
for
(
auto
&
part
:
committed_parts
)
part
->
check
State
({
DataPartState
::
Committed
});
part
->
assert
State
({
DataPartState
::
Committed
});
for
(
auto
&
part
:
precommitted_parts
)
part
->
check
State
({
DataPartState
::
PreCommitted
});
part
->
assert
State
({
DataPartState
::
PreCommitted
});
/// If it is rollback then do nothing, else make it Outdated and remove their size contribution
if
(
move_committed_to
!=
DataPartState
::
Committed
)
...
...
dbms/src/Storages/MergeTree/MergeTreeDataPart.h
浏览文件 @
db3a67a4
...
...
@@ -139,6 +139,9 @@ struct MergeTreeDataPart
/// If true, the destructor will delete the directory with the part.
bool
is_temp
=
false
;
/// For resharding.
size_t
shard_no
=
0
;
/**
* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
* Part state should be modified under data_parts mutex.
...
...
@@ -159,6 +162,7 @@ struct MergeTreeDataPart
Deleting
/// not active data part with identity refcounter, it is deleting right now by a cleaner
};
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
mutable
State
state
{
State
::
Temporary
};
/// Returns name of state
...
...
@@ -171,7 +175,7 @@ struct MergeTreeDataPart
}
/// Returns true if state of part is one of affordable_states
bool
tryCheckState
(
std
::
initializer_list
<
State
>
affordable_states
)
const
bool
checkState
(
const
std
::
initializer_list
<
State
>
&
affordable_states
)
const
{
for
(
auto
affordable_state
:
affordable_states
)
{
...
...
@@ -182,9 +186,9 @@ struct MergeTreeDataPart
}
/// Throws an exception if state of the part is not in affordable_states
void
checkState
(
std
::
initializer_list
<
State
>
affordable_states
)
const
void
assertState
(
const
std
::
initializer_list
<
State
>
&
affordable_states
)
const
{
if
(
!
tryC
heckState
(
affordable_states
))
if
(
!
c
heckState
(
affordable_states
))
{
String
states_str
;
for
(
auto
state
:
affordable_states
)
...
...
@@ -194,17 +198,24 @@ struct MergeTreeDataPart
}
}
/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
struct
StatesFilter
{
std
::
initializer_list
<
State
>
affordable_states
;
StatesFilter
(
const
std
::
initializer_list
<
State
>
&
affordable_states
)
:
affordable_states
(
affordable_states
)
{}
bool
operator
()
(
const
std
::
shared_ptr
<
const
MergeTreeDataPart
>
&
part
)
const
{
return
part
->
checkState
(
affordable_states
);
}
};
/// Returns a lambda that returns true only for part with states from specified list
static
inline
decltype
(
auto
)
getStatesFilter
(
std
::
initializer_list
<
State
>
affordable_states
)
static
inline
StatesFilter
getStatesFilter
(
const
std
::
initializer_list
<
State
>
&
affordable_states
)
{
return
[
affordable_states
]
(
const
std
::
shared_ptr
<
const
MergeTreeDataPart
>
&
part
)
{
return
part
->
tryCheckState
(
affordable_states
);
};
return
StatesFilter
(
affordable_states
);
}
/// For resharding.
size_t
shard_no
=
0
;
/// Primary key (correspond to primary.idx file).
/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
...
...
dbms/src/Storages/StorageReplicatedMergeTree.cpp
浏览文件 @
db3a67a4
...
...
@@ -3534,10 +3534,10 @@ void StorageReplicatedMergeTree::reshardPartitions(
/// Make a list of local partitions that need to be resharded.
std
::
set
<
std
::
string
>
unique_partition_list
;
const
MergeTreeData
::
DataParts
&
data_parts
=
data
.
getDataParts
();
for
(
MergeTreeData
::
DataParts
::
iterator
it
=
data_parts
.
cbegin
();
it
!=
data_parts
.
cend
();
++
it
)
auto
data_parts
=
data
.
getDataParts
();
for
(
auto
&
part
:
data_parts
)
{
const
String
&
current_partition_id
=
(
*
it
)
->
info
.
partition_id
;
const
String
&
current_partition_id
=
part
->
info
.
partition_id
;
if
(
include_all
||
partition_id
==
current_partition_id
)
unique_partition_list
.
insert
(
current_partition_id
);
}
...
...
dbms/src/Storages/tests/gtest_range_filtered.cpp.cpp
0 → 100644
浏览文件 @
db3a67a4
#include <gtest/gtest.h>
#include <common/RangeFiltered.h>
#include <vector>
#include <set>
TEST
(
RangeFiltered
,
simple
)
{
std
::
vector
<
int
>
v
;
for
(
int
i
=
0
;
i
<
10
;
++
i
)
v
.
push_back
(
i
);
auto
v30
=
createRangeFiltered
([]
(
int
i
)
{
return
i
%
3
==
0
;},
v
);
auto
v31
=
createRangeFiltered
([]
(
int
i
)
{
return
i
%
3
!=
0
;},
v
);
for
(
const
int
&
i
:
v30
)
ASSERT_EQ
(
i
%
3
,
0
);
for
(
const
int
&
i
:
v31
)
ASSERT_NE
(
i
%
3
,
0
);
{
auto
it
=
v30
.
begin
();
ASSERT_EQ
(
*
it
,
0
);
auto
it2
=
std
::
next
(
it
);
ASSERT_EQ
(
*
it2
,
3
);
auto
it3
=
it
;
it
=
std
::
next
(
it2
);
ASSERT_EQ
(
*
it
,
6
);
}
{
auto
it
=
std
::
next
(
v30
.
begin
());
ASSERT_EQ
(
*
it
,
3
);
*
it
=
2
;
/// it becomes invalid
ASSERT_EQ
(
*
(
++
it
),
6
);
/// but iteration is sucessfull
*
v30
.
begin
()
=
1
;
ASSERT_EQ
(
*
v30
.
begin
(),
6
);
}
}
dbms/tests/integration/test_extreme_deduplication/test.py
浏览文件 @
db3a67a4
...
...
@@ -102,4 +102,4 @@ done
except
Exception
as
e
:
pass
# Uncomment when problem will be fixed
#
check_timeout_exception(e)
check_timeout_exception
(
e
)
libs/libcommon/include/common/RangeFiltered.h
浏览文件 @
db3a67a4
#pragma once
#include <type_traits>
#include <boost/range/adaptor/filtered.hpp>
/// Similar to boost::filtered_range but a little bit easier and allows to convert ordinary iterators to filtered
template
<
typename
F
,
typename
C
>
struct
RangeFiltered
:
public
boost
::
iterator_range
<
boost
::
filter_iterator
<
F
,
decltype
(
std
::
end
(
C
()))
>>
struct
RangeFiltered
{
using
RawIterator
=
decltype
(
std
::
end
(
C
()));
using
FilterIterator
=
boost
::
filter_iterator
<
F
,
RawIterator
>
;
using
Base
=
boost
::
iterator_range
<
FilterIterator
>
;
using
RawIterator
=
typename
C
::
iterator
;
class
Iterator
;
RangeFiltered
(
const
F
&
filter
,
const
C
&
container
)
:
Base
(
FilterIterator
(
filter
,
std
::
begin
(
container
),
std
::
end
(
container
)),
FilterIterator
(
filter
,
std
::
end
(
container
),
std
::
end
(
container
))),
filter
(
filter
)
{}
/// Will iterate over elements for which filter(*it) == true
RangeFiltered
(
F
&&
filter
,
C
&
container
)
:
filter
(
std
::
move
(
filter
)),
container
(
container
)
{}
Iterator
begin
()
const
{
return
{
*
this
,
std
::
begin
(
container
)};
}
Iterator
end
()
const
{
return
{
*
this
,
std
::
end
(
container
)};
}
/// Convert ordinary iterator to filtered one
/// Real position will be in range [ordinary_iterator; end()], so it is suitable to use with lower[upper]_bound()
inline
Filter
Iterator
convert
(
RawIterator
ordinary_iterator
)
const
inline
Iterator
convert
(
RawIterator
ordinary_iterator
)
const
{
return
{
filter
,
ordinary_iterator
,
std
::
end
(
*
this
).
end
()
};
return
{
*
this
,
ordinary_iterator
};
}
/// It is similar to boost::filtered_iterator, but has additional features:
/// it doesn't store end() iterator
/// it doesn't store predicate, so it allows to implement operator=()
/// it guarantees that operator++() works properly in case of filter(*it) == false
class
Iterator
{
public:
using
Range
=
RangeFiltered
<
F
,
C
>
;
typedef
Iterator
self_type
;
typedef
typename
std
::
iterator_traits
<
RawIterator
>::
value_type
value_type
;
typedef
typename
std
::
iterator_traits
<
RawIterator
>::
reference
reference
;
typedef
const
value_type
&
const_reference
;
typedef
typename
std
::
iterator_traits
<
RawIterator
>::
pointer
pointer
;
typedef
const
value_type
*
const_pointer
;
typedef
typename
std
::
iterator_traits
<
RawIterator
>::
difference_type
difference_type
;
typedef
std
::
bidirectional_iterator_tag
iterator_category
;
Iterator
(
const
Range
&
range_
,
RawIterator
iter_
)
:
range
(
&
range_
),
iter
(
iter_
)
{
for
(;
iter
!=
std
::
end
(
range
->
container
)
&&
!
range
->
filter
(
*
iter
);
++
iter
);
}
Iterator
(
const
Iterator
&
rhs
)
=
default
;
Iterator
(
Iterator
&&
rhs
)
noexcept
=
default
;
Iterator
operator
++
()
{
++
iter
;
for
(;
iter
!=
std
::
end
(
range
->
container
)
&&
!
range
->
filter
(
*
iter
);
++
iter
);
return
*
this
;
}
Iterator
operator
--
()
{
--
iter
;
for
(;
!
range
->
filter
(
*
iter
);
--
iter
);
/// Don't check std::begin() bound
return
*
this
;
}
pointer
operator
->
()
{
return
iter
.
operator
->
();
}
const_pointer
operator
->
()
const
{
return
iter
.
operator
->
();
}
reference
operator
*
()
{
return
*
iter
;
}
const_reference
operator
*
()
const
{
return
*
iter
;
}
bool
operator
==
(
const
self_type
&
rhs
)
const
{
return
iter
==
rhs
.
iter
;
}
bool
operator
!=
(
const
self_type
&
rhs
)
const
{
return
iter
!=
rhs
.
iter
;
}
self_type
&
operator
=
(
const
self_type
&
rhs
)
=
default
;
self_type
&
operator
=
(
self_type
&&
rhs
)
noexcept
=
default
;
~
Iterator
()
=
default
;
private:
const
Range
*
range
=
nullptr
;
RawIterator
iter
;
};
protected:
F
filter
;
C
&
container
;
};
template
<
typename
F
,
typename
C
>
inline
decltype
(
auto
)
createRangeFiltered
(
F
&&
filter
,
C
&&
container
)
inline
RangeFiltered
<
std
::
decay_t
<
F
>
,
std
::
decay_t
<
C
>>
createRangeFiltered
(
F
&&
filter
,
C
&&
container
)
{
return
RangeFiltered
<
std
::
decay_t
<
F
>
,
std
::
decay_t
<
C
>>
{
std
::
forward
<
F
>
(
filter
),
std
::
forward
<
C
>
(
container
)};
return
{
std
::
forward
<
F
>
(
filter
),
std
::
forward
<
C
>
(
container
)};
};
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录