Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
6fce028b
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6fce028b
编写于
1月 21, 2019
作者:
N
Nikolai Kochetov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor ColumnsHashing.
上级
d2074985
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
446 addition
and
406 deletion
+446
-406
dbms/src/Common/ColumnsHashing.h
dbms/src/Common/ColumnsHashing.h
+123
-359
dbms/src/Common/ColumnsHashingImpl.h
dbms/src/Common/ColumnsHashingImpl.h
+276
-0
dbms/src/Interpreters/Aggregator.cpp
dbms/src/Interpreters/Aggregator.cpp
+41
-41
dbms/src/Interpreters/Aggregator.h
dbms/src/Interpreters/Aggregator.h
+6
-6
未找到文件。
dbms/src/Common/ColumnsHashing.h
浏览文件 @
6fce028b
此差异已折叠。
点击以展开。
dbms/src/Common/ColumnsHashingImpl.h
0 → 100644
浏览文件 @
6fce028b
#pragma once
#include <Columns/IColumn.h>
#include <Interpreters/AggregationCommon.h>
namespace
DB
{
namespace
ColumnsHashing
{
namespace
columns_hashing_impl
{
template
<
typename
Value
,
bool
consecutive_keys_optimization_
>
struct
LastElementCache
{
static
constexpr
bool
consecutive_keys_optimization
=
consecutive_keys_optimization_
;
Value
value
;
bool
empty
=
true
;
bool
found
=
false
;
bool
check
(
const
Value
&
value_
)
{
return
!
empty
&&
value
==
value_
;
}
template
<
typename
Key
>
bool
check
(
const
Key
&
key
)
{
return
!
empty
&&
value
.
first
==
key
;
}
};
template
<
typename
Data
>
struct
LastElementCache
<
Data
,
false
>
{
static
constexpr
bool
consecutive_keys_optimization
=
false
;
};
template
<
typename
Mapped
>
class
EmplaceResultImpl
{
Mapped
&
value
;
Mapped
&
cached_value
;
bool
inserted
;
public:
EmplaceResultImpl
(
Mapped
&
value
,
Mapped
&
cached_value
,
bool
inserted
)
:
value
(
value
),
cached_value
(
cached_value
),
inserted
(
inserted
)
{}
bool
isInserted
()
const
{
return
inserted
;
}
const
auto
&
getMapped
()
const
{
return
value
;
}
void
setMapped
(
const
Mapped
&
mapped
)
{
value
=
cached_value
=
mapped
;
}
};
template
<
>
class
EmplaceResultImpl
<
void
>
{
bool
inserted
;
public:
explicit
EmplaceResultImpl
(
bool
inserted
)
:
inserted
(
inserted
)
{}
bool
isInserted
()
const
{
return
inserted
;
}
};
template
<
typename
Mapped
>
class
FindResultImpl
{
Mapped
value
;
bool
found
;
public:
FindResultImpl
(
Mapped
value
,
bool
found
)
:
value
(
value
),
found
(
found
)
{}
bool
isFound
()
const
{
return
found
;
}
const
Mapped
&
getMapped
()
const
{
return
value
;
}
};
template
<
>
class
FindResultImpl
<
void
>
{
bool
found
;
public:
explicit
FindResultImpl
(
bool
found
)
:
found
(
found
)
{}
bool
isFound
()
const
{
return
found
;
}
};
template
<
typename
Value
,
typename
Mapped
,
bool
consecutive_keys_optimization
>
struct
HashMethodBase
{
using
EmplaceResult
=
EmplaceResultImpl
<
Mapped
>
;
using
FindResult
=
FindResultImpl
<
Mapped
>
;
static
constexpr
bool
has_mapped
=
!
std
::
is_same
<
Mapped
,
void
>::
value
;
using
Cache
=
LastElementCache
<
Value
,
consecutive_keys_optimization
>
;
protected:
Cache
cache
;
HashMethodBase
()
{
if
constexpr
(
has_mapped
&&
consecutive_keys_optimization
)
{
/// Init PairNoInit elements.
cache
.
value
.
second
=
Mapped
();
using
Key
=
decltype
(
cache
.
value
.
first
);
cache
.
value
.
first
=
Key
();
}
}
template
<
typename
Data
,
typename
Key
>
ALWAYS_INLINE
EmplaceResult
emplaceKeyImpl
(
Key
key
,
Data
&
data
,
typename
Data
::
iterator
&
it
)
{
if
constexpr
(
Cache
::
consecutive_keys_optimization
)
{
if
(
cache
.
found
&&
cache
.
check
(
key
))
{
if
constexpr
(
has_mapped
)
return
EmplaceResult
(
cache
.
value
.
second
,
cache
.
value
.
second
,
false
);
else
return
EmplaceResult
(
false
);
}
}
bool
inserted
=
false
;
data
.
emplace
(
key
,
it
,
inserted
);
Mapped
*
cached
=
&
it
->
second
;
if
constexpr
(
consecutive_keys_optimization
)
{
cache
.
value
=
*
it
;
cache
.
found
=
true
;
cache
.
empty
=
false
;
cached
=
&
cache
.
value
.
second
;
}
if
constexpr
(
has_mapped
)
return
EmplaceResult
(
it
->
second
,
*
cached
,
inserted
);
else
return
EmplaceResult
(
inserted
);
}
template
<
typename
Data
,
typename
Key
>
ALWAYS_INLINE
FindResult
findKeyImpl
(
Key
key
,
Data
&
data
)
{
if
constexpr
(
Cache
::
consecutive_keys_optimization
)
{
if
(
cache
.
check
(
key
))
{
if
constexpr
(
has_mapped
)
return
FindResult
(
cache
.
found
?
cache
.
value
.
second
:
Mapped
(),
cache
.
found
);
else
return
FindResult
(
cache
.
found
);
}
}
auto
it
=
data
.
find
(
key
);
bool
found
=
it
!=
data
.
end
();
if
constexpr
(
consecutive_keys_optimization
)
{
cache
.
found
=
found
;
cache
.
empty
=
false
;
if
(
found
)
cache
.
value
=
*
it
;
else
{
if
constexpr
(
has_mapped
)
cache
.
value
.
first
=
key
;
else
cache
.
value
=
key
;
}
}
if
constexpr
(
has_mapped
)
return
FindResult
(
found
?
it
->
second
:
Mapped
(),
found
);
else
return
FindResult
(
found
);
}
};
template
<
typename
T
>
struct
MappedCache
:
public
PaddedPODArray
<
T
>
{};
template
<
>
struct
MappedCache
<
void
>
{};
/// This class is designed to provide the functionality that is required for
/// supporting nullable keys in HashMethodKeysFixed. If there are
/// no nullable keys, this class is merely implemented as an empty shell.
template
<
typename
Key
,
bool
has_nullable_keys
>
class
BaseStateKeysFixed
;
/// Case where nullable keys are supported.
template
<
typename
Key
>
class
BaseStateKeysFixed
<
Key
,
true
>
{
protected:
void
init
(
const
ColumnRawPtrs
&
key_columns
)
{
null_maps
.
reserve
(
key_columns
.
size
());
actual_columns
.
reserve
(
key_columns
.
size
());
for
(
const
auto
&
col
:
key_columns
)
{
if
(
col
->
isColumnNullable
())
{
const
auto
&
nullable_col
=
static_cast
<
const
ColumnNullable
&>
(
*
col
);
actual_columns
.
push_back
(
&
nullable_col
.
getNestedColumn
());
null_maps
.
push_back
(
&
nullable_col
.
getNullMapColumn
());
}
else
{
actual_columns
.
push_back
(
col
);
null_maps
.
push_back
(
nullptr
);
}
}
}
/// Return the columns which actually contain the values of the keys.
/// For a given key column, if it is nullable, we return its nested
/// column. Otherwise we return the key column itself.
inline
const
ColumnRawPtrs
&
getActualColumns
()
const
{
return
actual_columns
;
}
/// Create a bitmap that indicates whether, for a particular row,
/// a key column bears a null value or not.
KeysNullMap
<
Key
>
createBitmap
(
size_t
row
)
const
{
KeysNullMap
<
Key
>
bitmap
{};
for
(
size_t
k
=
0
;
k
<
null_maps
.
size
();
++
k
)
{
if
(
null_maps
[
k
]
!=
nullptr
)
{
const
auto
&
null_map
=
static_cast
<
const
ColumnUInt8
&>
(
*
null_maps
[
k
]).
getData
();
if
(
null_map
[
row
]
==
1
)
{
size_t
bucket
=
k
/
8
;
size_t
offset
=
k
%
8
;
bitmap
[
bucket
]
|=
UInt8
(
1
)
<<
offset
;
}
}
}
return
bitmap
;
}
private:
ColumnRawPtrs
actual_columns
;
ColumnRawPtrs
null_maps
;
};
/// Case where nullable keys are not supported.
template
<
typename
Key
>
class
BaseStateKeysFixed
<
Key
,
false
>
{
protected:
void
init
(
const
ColumnRawPtrs
&
columns
)
{
actual_columns
=
columns
;
}
const
ColumnRawPtrs
&
getActualColumns
()
const
{
return
actual_columns
;
}
KeysNullMap
<
Key
>
createBitmap
(
size_t
)
const
{
throw
Exception
{
"Internal error: calling createBitmap() for non-nullable keys"
" is forbidden"
,
ErrorCodes
::
LOGICAL_ERROR
};
}
private:
ColumnRawPtrs
actual_columns
;
};
}
}
}
dbms/src/Interpreters/Aggregator.cpp
浏览文件 @
6fce028b
...
...
@@ -609,20 +609,34 @@ void NO_INLINE Aggregator::executeImplCase(
/// NOTE When editing this code, also pay attention to SpecializedAggregator.h.
/// For all rows.
AggregateDataPtr
value
=
nullptr
;
for
(
size_t
i
=
0
;
i
<
rows
;
++
i
)
{
bool
inserted
=
false
;
/// Inserted a new key, or was this key already?
AggregateDataPtr
*
aggregate_data
=
nullptr
;
AggregateDataPtr
aggregate_data
=
nullptr
;
if
constexpr
(
!
no_more_keys
)
/// Insert.
aggregate_data
=
state
.
emplaceKey
(
method
.
data
,
i
,
inserted
,
*
aggregates_pool
);
{
auto
emplace_result
=
state
.
emplaceKey
(
method
.
data
,
i
,
*
aggregates_pool
);
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
if
(
emplace_result
.
isInserted
())
{
/// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
emplace_result
.
setMapped
(
nullptr
);
aggregate_data
=
aggregates_pool
->
alignedAlloc
(
total_size_of_aggregate_states
,
align_aggregate_states
);
createAggregateStates
(
aggregate_data
);
emplace_result
.
setMapped
(
aggregate_data
);
}
else
aggregate_data
=
emplace_result
.
getMapped
();
}
else
{
/// Add only if the key already exists.
bool
found
=
false
;
aggregate_data
=
state
.
findKey
(
method
.
data
,
i
,
found
,
*
aggregates_pool
);
auto
find_result
=
state
.
findKey
(
method
.
data
,
i
,
*
aggregates_pool
);
if
(
find_result
.
isFound
())
aggregate_data
=
find_result
.
getMapped
();
}
/// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.
...
...
@@ -631,20 +645,7 @@ void NO_INLINE Aggregator::executeImplCase(
if
(
!
aggregate_data
&&
!
overflow_row
)
continue
;
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
if
(
inserted
)
{
/// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
*
aggregate_data
=
nullptr
;
AggregateDataPtr
place
=
aggregates_pool
->
alignedAlloc
(
total_size_of_aggregate_states
,
align_aggregate_states
);
createAggregateStates
(
place
);
*
aggregate_data
=
place
;
state
.
cacheData
(
i
,
place
);
}
value
=
aggregate_data
?
*
aggregate_data
:
overflow_row
;
AggregateDataPtr
value
=
aggregate_data
?
aggregate_data
:
overflow_row
;
/// Add values to the aggregate functions.
for
(
AggregateFunctionInstruction
*
inst
=
aggregate_instructions
;
inst
->
that
;
++
inst
)
...
...
@@ -1951,17 +1952,28 @@ void NO_INLINE Aggregator::mergeStreamsImplCase(
size_t
rows
=
block
.
rows
();
for
(
size_t
i
=
0
;
i
<
rows
;
++
i
)
{
typename
Table
::
iterator
it
;
AggregateDataPtr
*
aggregate_data
=
nullptr
;
bool
inserted
=
false
;
/// Inserted a new key, or was this key already?
AggregateDataPtr
aggregate_data
=
nullptr
;
if
(
!
no_more_keys
)
aggregate_data
=
state
.
emplaceKey
(
data
,
i
,
inserted
,
*
aggregates_pool
);
{
auto
emplace_result
=
state
.
emplaceKey
(
data
,
i
,
*
aggregates_pool
);
if
(
emplace_result
.
isInserted
())
{
emplace_result
.
setMapped
(
nullptr
);
aggregate_data
=
aggregates_pool
->
alignedAlloc
(
total_size_of_aggregate_states
,
align_aggregate_states
);
createAggregateStates
(
aggregate_data
);
emplace_result
.
setMapped
(
aggregate_data
);
}
else
aggregate_data
=
emplace_result
.
getMapped
();
}
else
{
bool
found
;
aggregate_data
=
state
.
findKey
(
data
,
i
,
found
,
*
aggregates_pool
);
auto
find_result
=
state
.
findKey
(
data
,
i
,
*
aggregates_pool
);
if
(
find_result
.
isFound
())
aggregate_data
=
find_result
.
getMapped
();
}
/// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.
...
...
@@ -1970,19 +1982,7 @@ void NO_INLINE Aggregator::mergeStreamsImplCase(
if
(
!
aggregate_data
&&
!
overflow_row
)
continue
;
/// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
if
(
inserted
)
{
*
aggregate_data
=
nullptr
;
AggregateDataPtr
place
=
aggregates_pool
->
alignedAlloc
(
total_size_of_aggregate_states
,
align_aggregate_states
);
createAggregateStates
(
place
);
*
aggregate_data
=
place
;
state
.
cacheData
(
i
,
place
);
}
AggregateDataPtr
value
=
aggregate_data
?
*
aggregate_data
:
overflow_row
;
AggregateDataPtr
value
=
aggregate_data
?
aggregate_data
:
overflow_row
;
/// Merge state of aggregate functions.
for
(
size_t
j
=
0
;
j
<
params
.
aggregates_size
;
++
j
)
...
...
dbms/src/Interpreters/Aggregator.h
浏览文件 @
6fce028b
...
...
@@ -158,7 +158,7 @@ struct AggregationMethodOneNumber
AggregationMethodOneNumber
(
const
Other
&
other
)
:
data
(
other
.
data
)
{}
/// To use one `Method` in different threads, use different `State`.
using
State
=
ColumnsHashing
::
HashMethodOneNumber
<
Data
,
FieldType
>
;
using
State
=
ColumnsHashing
::
HashMethodOneNumber
<
typename
Data
::
value_type
,
Mapped
,
FieldType
>
;
/// Use optimization for low cardinality.
static
const
bool
low_cardinality_optimization
=
false
;
...
...
@@ -188,7 +188,7 @@ struct AggregationMethodString
template
<
typename
Other
>
AggregationMethodString
(
const
Other
&
other
)
:
data
(
other
.
data
)
{}
using
State
=
ColumnsHashing
::
HashMethodString
<
Data
>
;
using
State
=
ColumnsHashing
::
HashMethodString
<
typename
Data
::
value_type
,
Mapped
>
;
static
const
bool
low_cardinality_optimization
=
false
;
...
...
@@ -216,7 +216,7 @@ struct AggregationMethodFixedString
template
<
typename
Other
>
AggregationMethodFixedString
(
const
Other
&
other
)
:
data
(
other
.
data
)
{}
using
State
=
ColumnsHashing
::
HashMethodFixedString
<
Data
>
;
using
State
=
ColumnsHashing
::
HashMethodFixedString
<
typename
Data
::
value_type
,
Mapped
>
;
static
const
bool
low_cardinality_optimization
=
false
;
...
...
@@ -246,7 +246,7 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
template
<
typename
Other
>
explicit
AggregationMethodSingleLowCardinalityColumn
(
const
Other
&
other
)
:
Base
(
other
)
{}
using
State
=
ColumnsHashing
::
HashMethodSingleLowCardinalityColumn
<
BaseState
,
true
>
;
using
State
=
ColumnsHashing
::
HashMethodSingleLowCardinalityColumn
<
BaseState
,
Mapped
,
true
>
;
static
const
bool
low_cardinality_optimization
=
true
;
...
...
@@ -277,7 +277,7 @@ struct AggregationMethodKeysFixed
template
<
typename
Other
>
AggregationMethodKeysFixed
(
const
Other
&
other
)
:
data
(
other
.
data
)
{}
using
State
=
ColumnsHashing
::
HashMethodKeysFixed
<
Data
,
has_nullable_keys
,
has_low_cardinality
>
;
using
State
=
ColumnsHashing
::
HashMethodKeysFixed
<
typename
Data
::
value_type
,
Key
,
Mapped
,
has_nullable_keys
,
has_low_cardinality
>
;
static
const
bool
low_cardinality_optimization
=
false
;
...
...
@@ -355,7 +355,7 @@ struct AggregationMethodSerialized
template
<
typename
Other
>
AggregationMethodSerialized
(
const
Other
&
other
)
:
data
(
other
.
data
)
{}
using
State
=
ColumnsHashing
::
HashMethodSerialized
<
Data
>
;
using
State
=
ColumnsHashing
::
HashMethodSerialized
<
typename
Data
::
value_type
,
Mapped
>
;
static
const
bool
low_cardinality_optimization
=
false
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录