Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
5e034186
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5e034186
编写于
3月 08, 2021
作者:
M
Maksim Kita
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added performance tests
上级
ed295a93
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
129 addition
and
57 deletion
+129
-57
src/Dictionaries/DirectDictionary.cpp
src/Dictionaries/DirectDictionary.cpp
+51
-57
tests/performance/direct_dictionary.xml
tests/performance/direct_dictionary.xml
+78
-0
未找到文件。
src/Dictionaries/DirectDictionary.cpp
浏览文件 @
5e034186
...
...
@@ -6,7 +6,9 @@
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/HashMap.h>
#include <Interpreters/AggregationCommon.h>
namespace
DB
{
...
...
@@ -259,24 +261,21 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
Arena
complex_key_arena
;
const
DictionaryAttribute
&
attribute
=
dict_struct
.
getAttribute
(
attribute_name
,
result_type
);
auto
result
=
attribute
.
type
->
createColumn
();
DefaultValueProvider
default_value_provider
(
attribute
.
null_value
,
default_values_column
);
DictionaryKeysExtractor
<
dictionary_key_type
>
extractor
(
key_columns
,
complex_key_arena
);
const
auto
&
requested_keys
=
extractor
.
getKeys
();
HashMap
<
KeyType
,
size_t
>
key_to_fetched_index
;
key_to_fetched_index
.
reserve
(
requested_keys
.
size
());
auto
fetched_from_storage
=
attribute
.
type
->
createColumn
();
size_t
fetched_key_index
=
0
;
size_t
requested_attribute_index
=
attribute_index_by_name
.
find
(
attribute_name
)
->
second
;
Columns
block_key_columns
;
size_t
dictionary_keys_size
=
dict_struct
.
getKeysNames
().
size
();
size_t
requested_key_index
=
0
;
Field
block_column_value
;
/** In result stream keys are returned in same order as they were requested.
* For example if we request keys [1, 2, 3, 4] but source has only [2, 3] we need to return to client
* [default_value, 2, 3, default_value].
* For each key fetched from source current algorithm adds default values until
* requested key with requested_key_index match key fetched from source.
* At the end we also need to process tail.
*/
block_key_columns
.
reserve
(
dictionary_keys_size
);
BlockInputStreamPtr
stream
=
getSourceBlockInputStream
(
key_columns
,
requested_keys
);
...
...
@@ -284,9 +283,6 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
while
(
const
auto
block
=
stream
->
read
())
{
Columns
block_key_columns
;
block_key_columns
.
reserve
(
dictionary_keys_size
);
auto
block_columns
=
block
.
getColumns
();
/// Split into keys columns and attribute columns
...
...
@@ -301,35 +297,39 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
size_t
block_keys_size
=
block_keys
.
size
();
const
auto
&
block_column
=
block
.
safeGetByPosition
(
dictionary_keys_size
+
requested_attribute_index
).
column
;
fetched_from_storage
->
insertRangeFrom
(
*
block_column
,
0
,
block_keys_size
);
for
(
size_t
block_key_index
=
0
;
block_key_index
<
block_keys_size
;
++
block_key_index
)
{
auto
block_key
=
block_keys
[
block_key_index
];
const
auto
&
block_key
=
block_keys
[
block_key_index
];
while
(
requested_key_index
<
requested_keys
.
size
()
&&
block_key
!=
requested_keys
[
requested_key_index
])
{
block_column_value
=
default_value_provider
.
getDefaultValue
(
requested_key_index
);
result
->
insert
(
block_column_value
);
++
requested_key_index
;
}
block_column
->
get
(
block_key_index
,
block_column_value
);
result
->
insert
(
block_column_value
);
++
requested_key_index
;
key_to_fetched_index
[
block_key
]
=
fetched_key_index
;
++
fetched_key_index
;
}
block_key_columns
.
clear
();
}
stream
->
readSuffix
();
Field
value_to_insert
;
size_t
requested_keys_size
=
requested_keys
.
size
();
auto
result
=
fetched_from_storage
->
cloneEmpty
();
result
->
reserve
(
requested_keys_size
);
Field
default_value
;
/// Process tail, if source returned keys less keys sizes than we fetched insert default value for tail
for
(;
requested_key_index
<
requested_keys_size
;
++
requested_key_index
)
for
(
size_t
requested_key_index
=
0
;
requested_key_index
<
requested_keys_size
;
++
requested_key_index
)
{
default_value
=
default_value_provider
.
getDefaultValue
(
requested_key_index
);
result
->
insert
(
default_value
);
const
auto
requested_key
=
requested_keys
[
requested_key_index
];
const
auto
*
it
=
key_to_fetched_index
.
find
(
requested_key
);
if
(
it
)
fetched_from_storage
->
get
(
it
->
getMapped
(),
value_to_insert
);
else
value_to_insert
=
default_value_provider
.
getDefaultValue
(
requested_key_index
);
result
->
insert
(
value_to_insert
);
}
query_count
.
fetch_add
(
requested_keys_size
,
std
::
memory_order_relaxed
);
...
...
@@ -349,17 +349,21 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
const
auto
&
requested_keys
=
requested_keys_extractor
.
getKeys
();
size_t
requested_keys_size
=
requested_keys
.
size
();
HashMap
<
KeyType
,
size_t
>
requested_key_to_index
;
requested_key_to_index
.
reserve
(
requested_keys_size
);
for
(
size_t
i
=
0
;
i
<
requested_keys
.
size
();
++
i
)
{
auto
requested_key
=
requested_keys
[
i
];
requested_key_to_index
[
requested_key
]
=
i
;
}
auto
result
=
ColumnUInt8
::
create
(
requested_keys_size
,
false
);
auto
&
result_data
=
result
->
getData
();
Columns
block_key_columns
;
size_t
dictionary_keys_size
=
dict_struct
.
getKeysNames
().
size
();
size_t
requested_key_index
=
0
;
Field
block_column_value
;
/** Algorithm is the same as in getColumn method. There are only 2 details
* 1. We does not process tail because result column is created with false default value.
* 2. If requested key does not match key from source we set false in requested_key_index.
*/
block_key_columns
.
reserve
(
dictionary_keys_size
);
BlockInputStreamPtr
stream
=
getSourceBlockInputStream
(
key_columns
,
requested_keys
);
...
...
@@ -369,9 +373,6 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
{
auto
block_columns
=
block
.
getColumns
();
Columns
block_key_columns
;
block_key_columns
.
reserve
(
dictionary_keys_size
);
/// Split into keys columns and attribute columns
for
(
size_t
i
=
0
;
i
<
dictionary_keys_size
;
++
i
)
{
...
...
@@ -381,28 +382,21 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns &
DictionaryKeysExtractor
<
dictionary_key_type
>
block_keys_extractor
(
block_key_columns
,
complex_key_arena
);
const
auto
&
block_keys
=
block_keys_extractor
.
getKeys
();
size_t
block_keys_size
=
block_keys
.
size
();
for
(
size_t
block_key_index
=
0
;
block_key_index
<
block_keys_size
;
++
block_key_index
)
for
(
const
auto
&
block_key
:
block_keys
)
{
auto
block_key
=
block_keys
[
block_key_index
];
const
auto
*
it
=
requested_key_to_index
.
find
(
block_key
);
assert
(
it
);
while
(
requested_key_index
<
requested_keys
.
size
()
&&
block_key
!=
requested_keys
[
requested_key_index
])
{
result_data
[
requested_key_index
]
=
false
;
++
requested_key_index
;
}
result_data
[
requested_key_index
]
=
true
;
++
requested_key_index
;
size_t
result_data_found_index
=
it
->
getMapped
();
result_data
[
result_data_found_index
]
=
true
;
}
block_key_columns
.
clear
();
}
stream
->
readSuffix
();
/// We does not add additional code for tail because result was initialized with false values
query_count
.
fetch_add
(
requested_keys_size
,
std
::
memory_order_relaxed
);
return
result
;
...
...
tests/performance/direct_dictionary.xml
0 → 100644
浏览文件 @
5e034186
<test>
<create_query>
CREATE TABLE simple_direct_dictionary_test_table
(
id UInt64,
value_int UInt64,
value_string String,
value_decimal Decimal64(8),
value_string_nullable Nullable(String)
) ENGINE = TinyLog;
</create_query>
<create_query>
INSERT INTO simple_direct_dictionary_test_table
SELECT number, number, toString(number), toDecimal64(number, 8), toString(number)
FROM system.numbers
LIMIT 100000;
</create_query>
<create_query>
CREATE DICTIONARY simple_direct_dictionary
(
id UInt64,
value_int UInt64,
value_string String,
value_decimal Decimal64(8),
value_string_nullable Nullable(String)
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_direct_dictionary_test_table'))
LAYOUT(DIRECT())
</create_query>
<create_query>
CREATE TABLE complex_direct_dictionary_test_table
(
id UInt64,
id_key String,
value_int UInt64,
value_string String,
value_decimal Decimal64(8),
value_string_nullable Nullable(String)
) ENGINE = TinyLog;
</create_query>
<create_query>
INSERT INTO simple_direct_dictionary_test_table
SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number)
FROM system.numbers
LIMIT 100000;
</create_query>
<create_query>
CREATE DICTIONARY complex_direct_dictionary
(
id UInt64,
id_key String,
value_int UInt64,
value_string String,
value_decimal Decimal64(8),
value_string_nullable Nullable(String)
)
PRIMARY KEY id, id_key
SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_direct_dictionary_test_table'))
LAYOUT(COMPLEX_KEY_DIRECT())
</create_query>
<query>
SELECT dictGet('default.simple_direct_dictionary', 'value_int', number) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.simple_direct_dictionary', 'value_string', number) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.simple_direct_dictionary', 'value_decimal', number) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.simple_direct_dictionary', 'value_string_nullable', number) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.complex_direct_dictionary', 'value_int', (number, toString(number))) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.complex_direct_dictionary', 'value_string', (number, toString(number))) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.complex_direct_dictionary', 'value_decimal', (number, toString(number))) FROM system.numbers LIMIT 150000;
</query>
<query>
SELECT dictGet('default.complex_direct_dictionary', 'value_string_nullable', (number, toString(number))) FROM system.numbers LIMIT 150000;
</query>
</test>
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录