Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
9170e5d4
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
9170e5d4
编写于
1月 23, 2020
作者:
N
Nikita Mikhaylov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
style errors
上级
9b63f289
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
69 addition
and
66 deletion
+69
-66
dbms/src/Dictionaries/CacheDictionary.cpp
dbms/src/Dictionaries/CacheDictionary.cpp
+65
-64
dbms/src/Dictionaries/CacheDictionary.h
dbms/src/Dictionaries/CacheDictionary.h
+4
-2
未找到文件。
dbms/src/Dictionaries/CacheDictionary.cpp
浏览文件 @
9170e5d4
...
...
@@ -67,7 +67,8 @@ CacheDictionary::CacheDictionary(
const
bool
allow_read_expired_keys_
,
const
size_t
max_update_queue_size_
,
const
size_t
update_queue_push_timeout_milliseconds_
,
const
size_t
each_update_finish_timeout_seconds_
)
const
size_t
each_update_finish_timeout_seconds_
,
const
size_t
max_threads_for_updates_
)
:
database
(
database_
)
,
name
(
name_
)
,
full_name
{
database_
.
empty
()
?
name_
:
(
database_
+
"."
+
name_
)}
...
...
@@ -78,19 +79,20 @@ CacheDictionary::CacheDictionary(
,
max_update_queue_size
(
max_update_queue_size_
)
,
update_queue_push_timeout_milliseconds
(
update_queue_push_timeout_milliseconds_
)
,
each_update_finish_timeout_seconds
(
each_update_finish_timeout_seconds_
)
,
max_threads_for_updates
(
max_threads_for_updates_
)
,
log
(
&
Logger
::
get
(
"ExternalDictionaries"
))
,
size
{
roundUpToPowerOfTwoOrZero
(
std
::
max
(
size_
,
size_t
(
max_collision_length
)))}
,
size_overlap_mask
{
this
->
size
-
1
}
,
cells
{
this
->
size
}
,
rnd_engine
(
randomSeed
())
,
update_queue
(
max_update_queue_size_
)
,
update_pool
(
4
)
,
update_pool
(
max_threads_for_updates
)
{
if
(
!
this
->
source_ptr
->
supportsSelectiveLoad
())
throw
Exception
{
full_name
+
": source cannot be used with CacheDictionary"
,
ErrorCodes
::
UNSUPPORTED_METHOD
};
createAttributes
();
for
(
int
i
=
0
;
i
<
4
;
++
i
)
for
(
size_t
i
=
0
;
i
<
max_threads_for_updates
;
++
i
)
{
update_pool
.
scheduleOrThrowOnError
([
this
]
{
updateThreadFunction
();
});
}
...
...
@@ -100,7 +102,8 @@ CacheDictionary::~CacheDictionary()
{
finished
=
true
;
update_queue
.
clear
();
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
max_threads_for_updates
;
++
i
)
{
auto
empty_finishing_ptr
=
std
::
make_shared
<
UpdateUnit
>
(
std
::
vector
<
Key
>
());
update_queue
.
push
(
empty_finishing_ptr
);
}
...
...
@@ -717,9 +720,16 @@ void registerDictionaryCache(DictionaryFactory & factory)
throw
Exception
{
name
+
": dictionary of layout 'cache' cannot have timeout equals to zero."
,
ErrorCodes
::
BAD_ARGUMENTS
};
const
size_t
max_threads_for_updates
=
config
.
getUInt64
(
layout_prefix
+
".max_threads_for_updates"
,
4
);
if
(
max_threads_for_updates
==
0
)
throw
Exception
{
name
+
": dictionary of layout 'cache' cannot have zero threads for updates."
,
ErrorCodes
::
BAD_ARGUMENTS
};
return
std
::
make_unique
<
CacheDictionary
>
(
database
,
name
,
dict_struct
,
std
::
move
(
source_ptr
),
dict_lifetime
,
size
,
allow_read_expired_keys
,
max_update_queue_size
,
update_queue_push_timeout_milliseconds
,
each_update_finish_timeout_seconds
);
allow_read_expired_keys
,
max_update_queue_size
,
update_queue_push_timeout_milliseconds
,
each_update_finish_timeout_seconds
,
max_threads_for_updates
);
};
factory
.
registerLayout
(
"cache"
,
create_layout
,
false
);
}
...
...
@@ -729,16 +739,12 @@ void CacheDictionary::updateThreadFunction()
setThreadName
(
"AsyncUpdater"
);
while
(
!
finished
)
{
UpdateUnitPtr
first_popped
;
update_queue
.
pop
(
first_popped
);
if
(
finished
)
break
;
///std::this_thread::sleep_for(std::chrono::milliseconds(10));
/// Here we pop as many unit pointers from update queue as we can.
/// We fix current size to avoid livelock (or too long waiting),
/// when this thread pops from the queue and other threads push to the queue.
...
...
@@ -746,17 +752,18 @@ void CacheDictionary::updateThreadFunction()
/// Word "bunch" must present in this log message, because it is being checked in tests.
if
(
current_queue_size
>
0
)
LOG_
DEBUG
(
log
,
"Performing bunch of keys update in cache dictionary with "
<<
current_queue_size
+
1
<<
" keys"
;
);
LOG_
TRACE
(
log
,
"Performing bunch of keys update in cache dictionary with "
<<
current_queue_size
+
1
<<
" keys"
);
std
::
vector
<
UpdateUnitPtr
>
update_request
;
update_request
.
reserve
(
current_queue_size
+
1
);
update_request
.
push
_back
(
first_popped
);
update_request
.
emplace
_back
(
first_popped
);
auto
current_unit_ptr
=
UpdateUnitPtr
()
;
UpdateUnitPtr
current_unit_ptr
;
while
(
update_queue
.
tryPop
(
current_unit_ptr
))
{
update_request
.
push_back
(
current_unit_ptr
);
update_request
.
emplace_back
(
std
::
move
(
current_unit_ptr
)
);
}
/// Here we prepare total count of all requested ids
...
...
@@ -907,74 +914,68 @@ void CacheDictionary::update(const std::vector<Key> & requested_ids, std::unorde
}
Stopwatch
watch
;
/// Go to external storage. Might be very slow and blocking.
auto
start
=
std
::
chrono
::
system_clock
::
now
();
auto
stream
=
source_ptr
->
loadIds
(
requested_ids
);
auto
load_ids_start
=
std
::
chrono
::
system_clock
::
now
(
);
auto
end
=
std
::
chrono
::
system_clock
::
now
();
/// Trip to external storage. Might be very bad, slow and blocking.
auto
stream
=
source_ptr
->
loadIds
(
requested_ids
);
std
::
chrono
::
duration
<
double
>
diff
=
end
-
start
;
auto
load_ids_end
=
std
::
chrono
::
system_clock
::
now
()
;
LOG_FATAL
(
log
,
"load ids "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
diff
).
count
()
<<
" ms"
);
LOG_TRACE
(
log
,
"Loading "
<<
requested_ids
.
size
()
<<
" number of ids from external storage took "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
load_ids_end
-
load_ids_start
).
count
()
<<
" ms"
);
stream
->
readPrefix
();
while
(
true
)
while
(
const
auto
block
=
stream
->
read
()
)
{
start
=
std
::
chrono
::
system_clock
::
now
();
if
(
const
auto
block
=
stream
->
read
())
{
end
=
std
::
chrono
::
system_clock
::
now
();
diff
=
end
-
start
;
LOG_FATAL
(
log
,
"read "
<<
std
::
chrono
::
duration_cast
<
std
::
chrono
::
milliseconds
>
(
diff
).
count
()
<<
" ms"
);
const
auto
id_column
=
typeid_cast
<
const
ColumnUInt64
*>
(
block
.
safeGetByPosition
(
0
).
column
.
get
());
if
(
!
id_column
)
throw
Exception
{
name
+
": id column has type different from UInt64."
,
ErrorCodes
::
TYPE_MISMATCH
};
const
auto
&
ids
=
id_column
->
getData
();
/// cache column pointers
const
auto
column_ptrs
=
ext
::
map
<
std
::
vector
>
(
ext
::
range
(
0
,
attributes
.
size
()),
[
&
block
](
size_t
i
)
{
return
block
.
safeGetByPosition
(
i
+
1
).
column
.
get
();
});
const
auto
id_column
=
typeid_cast
<
const
ColumnUInt64
*>
(
block
.
safeGetByPosition
(
0
).
column
.
get
());
if
(
!
id_column
)
throw
Exception
{
name
+
": id column has type different from UInt64."
,
ErrorCodes
::
TYPE_MISMATCH
};
const
ProfilingScopedWriteRWLock
write_lock
{
rw_lock
,
ProfileEvents
::
DictCacheLockWriteNs
}
;
const
auto
&
ids
=
id_column
->
getData
()
;
for
(
const
auto
i
:
ext
::
range
(
0
,
ids
.
size
()))
{
const
auto
id
=
ids
[
i
];
/// cache column pointers
const
auto
column_ptrs
=
ext
::
map
<
std
::
vector
>
(
ext
::
range
(
0
,
attributes
.
size
()),
[
&
block
](
size_t
i
)
{
return
block
.
safeGetByPosition
(
i
+
1
).
column
.
get
();
});
const
auto
find_result
=
findCellIdx
(
id
,
now
);
const
auto
&
cell_idx
=
find_result
.
cell_idx
;
const
ProfilingScopedWriteRWLock
write_lock
{
rw_lock
,
ProfileEvents
::
DictCacheLockWriteNs
};
auto
&
cell
=
cells
[
cell_idx
];
for
(
const
auto
attribute_idx
:
ext
::
range
(
0
,
attributes
.
size
()))
{
const
auto
&
attribute_column
=
*
column_ptrs
[
attribute_idx
];
auto
&
attribute
=
attributes
[
attribute_idx
];
for
(
const
auto
i
:
ext
::
range
(
0
,
ids
.
size
()))
{
const
auto
id
=
ids
[
i
];
setAttributeValue
(
attribute
,
cell_idx
,
attribute_column
[
i
]
);
}
const
auto
find_result
=
findCellIdx
(
id
,
now
);
const
auto
&
cell_idx
=
find_result
.
cell_idx
;
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if
(
cell
.
id
==
0
&&
cell_idx
!=
zero_cell_idx
)
element_count
.
fetch_add
(
1
,
std
::
memory_order_relaxed
);
auto
&
cell
=
cells
[
cell_idx
];
cell
.
id
=
id
;
if
(
dict_lifetime
.
min_sec
!=
0
&&
dict_lifetime
.
max_sec
!=
0
)
{
std
::
uniform_int_distribution
<
UInt64
>
distribution
{
dict_lifetime
.
min_sec
,
dict_lifetime
.
max_sec
};
cell
.
setExpiresAt
(
now
+
std
::
chrono
::
seconds
{
distribution
(
rnd_engine
)});
}
else
cell
.
setExpiresAt
(
std
::
chrono
::
time_point
<
std
::
chrono
::
system_clock
>::
max
());
for
(
const
auto
attribute_idx
:
ext
::
range
(
0
,
attributes
.
size
()))
{
const
auto
&
attribute_column
=
*
column_ptrs
[
attribute_idx
];
auto
&
attribute
=
attributes
[
attribute_idx
];
/// mark corresponding id as found
remaining_ids
[
id
]
=
1
;
++
found_num
;
setAttributeValue
(
attribute
,
cell_idx
,
attribute_column
[
i
]);
}
}
else
{
break
;
/// if cell id is zero and zero does not map to this cell, then the cell is unused
if
(
cell
.
id
==
0
&&
cell_idx
!=
zero_cell_idx
)
element_count
.
fetch_add
(
1
,
std
::
memory_order_relaxed
);
cell
.
id
=
id
;
if
(
dict_lifetime
.
min_sec
!=
0
&&
dict_lifetime
.
max_sec
!=
0
)
{
std
::
uniform_int_distribution
<
UInt64
>
distribution
{
dict_lifetime
.
min_sec
,
dict_lifetime
.
max_sec
};
cell
.
setExpiresAt
(
now
+
std
::
chrono
::
seconds
{
distribution
(
rnd_engine
)});
}
else
cell
.
setExpiresAt
(
std
::
chrono
::
time_point
<
std
::
chrono
::
system_clock
>::
max
());
/// mark corresponding id as found
remaining_ids
[
id
]
=
1
;
++
found_num
;
}
}
...
...
dbms/src/Dictionaries/CacheDictionary.h
浏览文件 @
9170e5d4
...
...
@@ -54,7 +54,8 @@ public:
const
bool
allow_read_expired_keys_
,
const
size_t
max_update_queue_size_
,
const
size_t
update_queue_push_timeout_milliseconds_
,
const
size_t
each_update_finish_timeout_seconds_
);
const
size_t
each_update_finish_timeout_seconds_
,
const
size_t
max_threads_for_updates
);
~
CacheDictionary
()
override
;
...
...
@@ -84,7 +85,7 @@ public:
return
std
::
make_shared
<
CacheDictionary
>
(
database
,
name
,
dict_struct
,
source_ptr
->
clone
(),
dict_lifetime
,
size
,
allow_read_expired_keys
,
max_update_queue_size
,
update_queue_push_timeout_milliseconds
,
each_update_finish_timeout_seconds
);
update_queue_push_timeout_milliseconds
,
each_update_finish_timeout_seconds
,
max_threads_for_updates
);
}
const
IDictionarySource
*
getSource
()
const
override
{
return
source_ptr
.
get
();
}
...
...
@@ -295,6 +296,7 @@ private:
const
size_t
max_update_queue_size
;
const
size_t
update_queue_push_timeout_milliseconds
;
const
size_t
each_update_finish_timeout_seconds
;
const
size_t
max_threads_for_updates
;
Logger
*
const
log
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录