Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
f96a7e40
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f96a7e40
编写于
4月 05, 2019
作者:
M
Martijn Bakker
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove the list lookup and prevent the global lock
上级
97dd0e2a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
100 addition
and
101 deletion
+100
-101
dbms/src/Common/SortedLookupPODArray.h
dbms/src/Common/SortedLookupPODArray.h
+0
-52
dbms/src/Interpreters/Join.cpp
dbms/src/Interpreters/Join.cpp
+3
-9
dbms/src/Interpreters/Join.h
dbms/src/Interpreters/Join.h
+0
-3
dbms/src/Interpreters/RowRefs.cpp
dbms/src/Interpreters/RowRefs.cpp
+23
-21
dbms/src/Interpreters/RowRefs.h
dbms/src/Interpreters/RowRefs.h
+74
-16
未找到文件。
dbms/src/Common/SortedLookupPODArray.h
已删除
100644 → 0
浏览文件 @
97dd0e2a
#pragma once
#include <vector>
//#include <Common/PODArray.h>
namespace
DB
{
/**
* This class is intended to push sortable data into.
* When looking up values the container ensures that it is sorted for log(N) lookup
*
* Note, this is only efficient when the insertions happen in one stage, followed by all retrievals
* This way the data only gets sorted once.
*/
template
<
typename
T
>
class
SortedLookupPODArray
{
public:
using
Base
=
std
::
vector
<
T
>
;
//using Base = PaddedPODArray<T>;
template
<
typename
U
,
typename
...
TAllocatorParams
>
void
insert
(
U
&&
x
,
TAllocatorParams
&&
...
allocator_params
)
{
array
.
push_back
(
std
::
forward
<
U
>
(
x
),
std
::
forward
<
TAllocatorParams
>
(
allocator_params
)...);
sorted
=
false
;
}
typename
Base
::
const_iterator
upper_bound
(
const
T
&
k
)
{
if
(
!
sorted
)
sort
();
return
std
::
upper_bound
(
array
.
cbegin
(),
array
.
cend
(),
k
);
}
typename
Base
::
const_iterator
cbegin
()
const
{
return
array
.
cbegin
();
}
typename
Base
::
const_iterator
cend
()
const
{
return
array
.
cend
();
}
private:
Base
array
;
bool
sorted
=
false
;
void
sort
()
{
std
::
sort
(
array
.
begin
(),
array
.
end
());
sorted
=
true
;
}
};
}
dbms/src/Interpreters/Join.cpp
浏览文件 @
f96a7e40
...
...
@@ -422,8 +422,8 @@ namespace
typename
Map
::
mapped_type
*
time_series_map
=
&
emplace_result
.
getMapped
();
if
(
emplace_result
.
isInserted
())
time_series_map
=
new
(
time_series_map
)
typename
Map
::
mapped_type
();
time_series_map
->
insert
(
join
.
getAsofType
(),
join
.
getAsofData
(),
asof_column
,
stored_block
,
i
);
time_series_map
=
new
(
time_series_map
)
typename
Map
::
mapped_type
(
join
.
getAsofType
()
);
time_series_map
->
insert
(
join
.
getAsofType
(),
asof_column
,
stored_block
,
i
);
}
};
...
...
@@ -511,10 +511,7 @@ void Join::prepareBlockListStructure(Block & stored_block)
for
(
const
auto
&
name
:
key_names_right
)
{
if
(
strictness
==
ASTTableJoin
::
Strictness
::
Asof
&&
name
==
key_names_right
.
back
())
{
LOG_DEBUG
(
log
,
"preventing removal of ASOF join column with name="
<<
name
);
break
;
// this is the last column so break is OK
}
if
(
!
erased
.
count
(
name
))
stored_block
.
erase
(
stored_block
.
getPositionByName
(
name
));
...
...
@@ -556,8 +553,6 @@ bool Join::insertFromBlock(const Block & block)
prepareBlockListStructure
(
*
stored_block
);
LOG_DEBUG
(
log
,
"insertFromBlock stored_block="
<<
stored_block
->
dumpStructure
());
size_t
size
=
stored_block
->
columns
();
/// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
...
...
@@ -720,7 +715,7 @@ std::unique_ptr<IColumn::Offsets> NO_INLINE joinRightIndexedColumns(
if
constexpr
(
STRICTNESS
==
ASTTableJoin
::
Strictness
::
Asof
)
{
if
(
const
RowRef
*
found
=
mapped
.
findAsof
(
join
.
getAsofType
(),
join
.
getAsofData
(),
asof_column
,
i
))
if
(
const
RowRef
*
found
=
mapped
.
findAsof
(
join
.
getAsofType
(),
asof_column
,
i
))
{
filter
[
i
]
=
1
;
mapped
.
setUsed
();
...
...
@@ -1096,7 +1091,6 @@ void Join::joinGet(Block & block, const String & column_name) const
void
Join
::
joinBlock
(
Block
&
block
,
const
Names
&
key_names_left
,
const
NamesAndTypesList
&
columns_added_by_join
)
const
{
std
::
shared_lock
lock
(
rwlock
);
LOG_DEBUG
(
log
,
"joinBlock: "
<<
block
.
dumpStructure
());
checkTypesOfKeys
(
block
,
key_names_left
,
sample_block_with_keys
);
...
...
dbms/src/Interpreters/Join.h
浏览文件 @
f96a7e40
...
...
@@ -132,8 +132,6 @@ public:
ASTTableJoin
::
Kind
getKind
()
const
{
return
kind
;
}
AsofRowRefs
::
Type
getAsofType
()
const
{
return
*
asof_type
;
}
AsofRowRefs
::
LookupLists
&
getAsofData
()
{
return
asof_lookup_lists
;
}
const
AsofRowRefs
::
LookupLists
&
getAsofData
()
const
{
return
asof_lookup_lists
;
}
/** Depending on template parameter, adds or doesn't add a flag, that element was used (row was joined).
* Depending on template parameter, decide whether to overwrite existing values when encountering the same key again
...
...
@@ -369,7 +367,6 @@ private:
private:
Type
type
=
Type
::
EMPTY
;
std
::
optional
<
AsofRowRefs
::
Type
>
asof_type
;
AsofRowRefs
::
LookupLists
asof_lookup_lists
;
static
Type
chooseMethod
(
const
ColumnRawPtrs
&
key_columns
,
Sizes
&
key_sizes
);
...
...
dbms/src/Interpreters/RowRefs.cpp
浏览文件 @
f96a7e40
...
...
@@ -30,51 +30,53 @@ void callWithType(AsofRowRefs::Type which, F && f)
}
// namespace
void
AsofRowRefs
::
insert
(
Type
type
,
LookupLists
&
lookup_data
,
const
IColumn
*
asof_column
,
const
Block
*
block
,
size_t
row_num
)
AsofRowRefs
::
AsofRowRefs
(
Type
type
)
{
auto
call
=
[
&
](
const
auto
&
t
)
{
using
T
=
std
::
decay_t
<
decltype
(
t
)
>
;
using
LookupType
=
typename
Entry
<
T
>::
LookupType
;
lookups
=
std
::
make_unique
<
LookupType
>
();
};
callWithType
(
type
,
call
);
}
void
AsofRowRefs
::
insert
(
Type
type
,
const
IColumn
*
asof_column
,
const
Block
*
block
,
size_t
row_num
)
{
auto
call
=
[
&
](
const
auto
&
t
)
{
using
T
=
std
::
decay_t
<
decltype
(
t
)
>
;
using
Lookup
Type
=
typename
Entry
<
T
>::
LookupType
;
using
Lookup
Ptr
=
typename
Entry
<
T
>::
LookupPtr
;
auto
*
column
=
typeid_cast
<
const
ColumnVector
<
T
>
*>
(
asof_column
);
T
key
=
column
->
getElement
(
row_num
);
auto
entry
=
Entry
<
T
>
(
key
,
RowRef
(
block
,
row_num
));
std
::
lock_guard
<
std
::
mutex
>
lock
(
lookup_data
.
mutex
);
if
(
!
lookups
)
{
lookup_data
.
lookups
.
push_back
(
Lookups
());
lookup_data
.
lookups
.
back
()
=
LookupType
();
lookups
=
&
lookup_data
.
lookups
.
back
();
}
std
::
get
<
LookupType
>
(
*
lookups
).
insert
(
entry
);
std
::
get
<
LookupPtr
>
(
lookups
)
->
insert
(
entry
);
};
callWithType
(
type
,
call
);
}
const
RowRef
*
AsofRowRefs
::
findAsof
(
Type
type
,
const
LookupLists
&
lookup_data
,
const
IColumn
*
asof_column
,
size_t
row_num
)
const
const
RowRef
*
AsofRowRefs
::
findAsof
(
Type
type
,
const
IColumn
*
asof_column
,
size_t
row_num
)
const
{
const
RowRef
*
out
=
nullptr
;
auto
call
=
[
&
](
const
auto
&
t
)
{
using
T
=
std
::
decay_t
<
decltype
(
t
)
>
;
using
Lookup
Type
=
typename
Entry
<
T
>::
LookupType
;
using
Lookup
Ptr
=
typename
Entry
<
T
>::
LookupPtr
;
auto
*
column
=
typeid_cast
<
const
ColumnVector
<
T
>
*>
(
asof_column
);
T
key
=
column
->
getElement
(
row_num
);
auto
&
typed_lookup
=
std
::
get
<
LookupPtr
>
(
lookups
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
lookup_data
.
mutex
);
if
(
!
lookups
)
return
;
// The first thread that calls upper_bound ensures that the data is sorted
auto
it
=
typed_lookup
->
upper_bound
(
Entry
<
T
>
(
key
));
auto
&
typed_lookup
=
std
::
get
<
LookupType
>
(
*
lookups
);
auto
it
=
typed_lookup
.
upper_bound
(
Entry
<
T
>
(
key
));
if
(
it
!=
typed_lookup
.
cbegin
())
// cbegin() is safe to call now because the array is immutable after sorting
// hence the pointer to a entry can be returned
if
(
it
!=
typed_lookup
->
cbegin
())
out
=
&
((
--
it
)
->
row_ref
);
};
...
...
dbms/src/Interpreters/RowRefs.h
浏览文件 @
f96a7e40
#pragma once
#include <Columns/IColumn.h>
#include <Common/SortedLookupPODArray.h>
#include <optional>
#include <variant>
#include <list>
#include <mutex>
#include <algorithm>
namespace
DB
{
...
...
@@ -32,14 +32,68 @@ struct RowRefList : RowRef
RowRefList
(
const
Block
*
block_
,
size_t
row_num_
)
:
RowRef
(
block_
,
row_num_
)
{}
};
/**
* This class is intended to push sortable data into.
* When looking up values the container ensures that it is sorted for log(N) lookup
* After calling any of the lookup methods, it is no longer allowed to insert more data as this would invalidate the
* references that can be returned by the lookup methods
*/
template
<
typename
T
>
class
SortedLookupVector
{
public:
using
Base
=
std
::
vector
<
T
>
;
// First stage, insertions into the vector
template
<
typename
U
,
typename
...
TAllocatorParams
>
void
insert
(
U
&&
x
,
TAllocatorParams
&&
...
allocator_params
)
{
assert
(
!
sorted
.
load
(
std
::
memory_order_acquire
));
array
.
push_back
(
std
::
forward
<
U
>
(
x
),
std
::
forward
<
TAllocatorParams
>
(
allocator_params
)...);
}
// Transition into second stage, ensures that the vector is sorted
typename
Base
::
const_iterator
upper_bound
(
const
T
&
k
)
{
sort
();
return
std
::
upper_bound
(
array
.
cbegin
(),
array
.
cend
(),
k
);
}
// After ensuring that the vector is sorted by calling a lookup these are safe to call
typename
Base
::
const_iterator
cbegin
()
const
{
return
array
.
cbegin
();
}
typename
Base
::
const_iterator
cend
()
const
{
return
array
.
cend
();
}
private:
Base
array
;
std
::
atomic
<
bool
>
sorted
=
false
;
mutable
std
::
mutex
lock
;
// Double checked locking with SC atomics works in C++
// https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
// The first thread that calls one of the lookup methods sorts the data
// After calling the first lookup method it is no longer allowed to insert any data
// the array becomes immutable
void
sort
()
{
if
(
!
sorted
.
load
(
std
::
memory_order_acquire
))
{
std
::
lock_guard
<
std
::
mutex
>
l
(
lock
);
if
(
!
sorted
.
load
(
std
::
memory_order_relaxed
))
{
std
::
sort
(
array
.
begin
(),
array
.
end
());
sorted
.
store
(
true
,
std
::
memory_order_release
);
}
}
}
};
class
AsofRowRefs
{
public:
template
<
typename
T
>
struct
Entry
{
using
LookupType
=
SortedLookup
PODArray
<
Entry
<
T
>>
;
using
LookupType
=
SortedLookup
Vector
<
Entry
<
T
>>
;
using
LookupPtr
=
std
::
unique_ptr
<
LookupType
>
;
T
asof_value
;
RowRef
row_ref
;
...
...
@@ -53,16 +107,10 @@ public:
};
using
Lookups
=
std
::
variant
<
Entry
<
UInt32
>::
LookupType
,
Entry
<
UInt64
>::
LookupType
,
Entry
<
Float32
>::
LookupType
,
Entry
<
Float64
>::
LookupType
>
;
struct
LookupLists
{
mutable
std
::
mutex
mutex
;
std
::
list
<
Lookups
>
lookups
;
};
Entry
<
UInt32
>::
LookupPtr
,
Entry
<
UInt64
>::
LookupPtr
,
Entry
<
Float32
>::
LookupPtr
,
Entry
<
Float64
>::
LookupPtr
>
;
enum
class
Type
{
...
...
@@ -72,13 +120,23 @@ public:
keyf64
,
};
AsofRowRefs
()
{}
AsofRowRefs
(
Type
t
);
static
std
::
optional
<
Type
>
getTypeSize
(
const
IColumn
*
asof_column
,
size_t
&
type_size
);
void
insert
(
Type
type
,
LookupLists
&
,
const
IColumn
*
asof_column
,
const
Block
*
block
,
size_t
row_num
);
const
RowRef
*
findAsof
(
Type
type
,
const
LookupLists
&
,
const
IColumn
*
asof_column
,
size_t
row_num
)
const
;
// This will be synchronized by the rwlock mutex in Join.h
void
insert
(
Type
type
,
const
IColumn
*
asof_column
,
const
Block
*
block
,
size_t
row_num
);
// This will internally synchronize
const
RowRef
*
findAsof
(
Type
type
,
const
IColumn
*
asof_column
,
size_t
row_num
)
const
;
private:
Lookups
*
lookups
=
nullptr
;
// Lookups can be stored in a HashTable because it is memmovable
// A std::variant contains a currently active type id (memmovable), together with a union of the types
// The types are all std::unique_ptr, which contains a single pointer, which is memmovable.
// Source: https://github.com/yandex/ClickHouse/issues/4906
Lookups
lookups
;
};
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录