Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
2dot5
ClickHouse
提交
e4785aeb
C
ClickHouse
项目概览
2dot5
/
ClickHouse
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
C
ClickHouse
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
e4785aeb
编写于
8月 02, 2017
作者:
V
Vitaliy Lyudvichenko
提交者:
alexey-milovidov
8月 13, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Simplified clean queue logic. Added more tests. [#CLICKHOUSE-3128]
上级
c2e84d6d
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
337 addition
and
252 deletion
+337
-252
dbms/src/Common/Exception.cpp
dbms/src/Common/Exception.cpp
+1
-1
dbms/src/Common/ZooKeeper/Lock.cpp
dbms/src/Common/ZooKeeper/Lock.cpp
+5
-0
dbms/src/Common/ZooKeeper/Lock.h
dbms/src/Common/ZooKeeper/Lock.h
+1
-0
dbms/src/Common/ZooKeeper/ZooKeeper.h
dbms/src/Common/ZooKeeper/ZooKeeper.h
+1
-1
dbms/src/Core/ErrorCodes.cpp
dbms/src/Core/ErrorCodes.cpp
+1
-0
dbms/src/Interpreters/DDLWorker.cpp
dbms/src/Interpreters/DDLWorker.cpp
+113
-108
dbms/src/Interpreters/DDLWorker.h
dbms/src/Interpreters/DDLWorker.h
+5
-3
dbms/tests/integration/helpers/client.py
dbms/tests/integration/helpers/client.py
+1
-1
dbms/tests/integration/helpers/cluster.py
dbms/tests/integration/helpers/cluster.py
+20
-4
dbms/tests/integration/helpers/network.py
dbms/tests/integration/helpers/network.py
+21
-0
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml
...gration/test_distributed_ddl/configs/config.d/cluster.xml
+0
-28
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml
...ributed_ddl/configs/config.d/cluster_default_database.xml
+0
-32
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml
..._distributed_ddl/configs/config.d/cluster_no_replicas.xml
+0
-34
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml
...configs/config.d/cluster_without_internal_replication.xml
+0
-28
dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml
...ration/test_distributed_ddl/configs/config.d/clusters.xml
+119
-0
dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
...integration/test_distributed_ddl/configs/config.d/ddl.xml
+3
-0
dbms/tests/integration/test_distributed_ddl/test.py
dbms/tests/integration/test_distributed_ddl/test.py
+46
-12
未找到文件。
dbms/src/Common/Exception.cpp
浏览文件 @
e4785aeb
...
...
@@ -258,7 +258,7 @@ bool ExecutionStatus::tryDeserializeText(const std::string & data)
ExecutionStatus
ExecutionStatus
::
fromCurrentException
(
const
std
::
string
&
start_of_message
)
{
String
msg
=
start_of_message
.
empty
()
?
""
:
(
start_of_message
+
": "
+
getCurrentExceptionMessage
(
false
,
true
)
);
String
msg
=
(
start_of_message
.
empty
()
?
""
:
(
start_of_message
+
": "
))
+
getCurrentExceptionMessage
(
false
,
true
);
return
ExecutionStatus
(
getCurrentExceptionCode
(),
msg
);
}
...
...
dbms/src/Common/ZooKeeper/Lock.cpp
浏览文件 @
e4785aeb
...
...
@@ -138,3 +138,8 @@ void Lock::unlockOrMoveIfFailed(std::vector<zkutil::Lock> & failed_to_unlock_loc
}
}
void
Lock
::
unlockAssumeLockNodeRemovedManually
()
{
locked
.
reset
(
nullptr
);
}
dbms/src/Common/ZooKeeper/Lock.h
浏览文件 @
e4785aeb
...
...
@@ -60,6 +60,7 @@ namespace zkutil
void
unlock
();
void
unlockOrMoveIfFailed
(
std
::
vector
<
zkutil
::
Lock
>
&
failed_to_unlock_locks
);
void
unlockAssumeLockNodeRemovedManually
();
bool
tryLock
();
...
...
dbms/src/Common/ZooKeeper/ZooKeeper.h
浏览文件 @
e4785aeb
...
...
@@ -95,7 +95,7 @@ public:
/// Throw an exception if something went wrong.
std
::
string
create
(
const
std
::
string
&
path
,
const
std
::
string
&
data
,
int32_t
mode
);
/// Does
n
not throw in the following cases:
/// Does not throw in the following cases:
/// * The parent for the created node does not exist
/// * The parent is ephemeral.
/// * The node already exists.
...
...
dbms/src/Core/ErrorCodes.cpp
浏览文件 @
e4785aeb
...
...
@@ -381,6 +381,7 @@ namespace ErrorCodes
extern
const
int
CANNOT_PARSE_UUID
=
376
;
extern
const
int
ILLEGAL_SYNTAX_FOR_DATA_TYPE
=
377
;
extern
const
int
DATA_TYPE_CANNOT_HAVE_ARGUMENTS
=
378
;
extern
const
int
UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK
=
379
;
extern
const
int
KEEPER_EXCEPTION
=
999
;
extern
const
int
POCO_EXCEPTION
=
1000
;
...
...
dbms/src/Interpreters/DDLWorker.cpp
浏览文件 @
e4785aeb
...
...
@@ -49,8 +49,9 @@ namespace ErrorCodes
extern
const
int
INCONSISTENT_TABLE_ACCROSS_SHARDS
;
extern
const
int
INCONSISTENT_CLUSTER_DEFINITION
;
extern
const
int
TIMEOUT_EXCEEDED
;
extern
const
int
UNFINISHED
;
extern
const
int
UNKNOWN_TYPE_OF_QUERY
;
extern
const
int
UNFINISHED
;
extern
const
int
UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK
;
}
...
...
@@ -189,6 +190,15 @@ struct DDLTask
};
static
std
::
unique_ptr
<
zkutil
::
Lock
>
createSimpleZooKeeperLock
(
std
::
shared_ptr
<
zkutil
::
ZooKeeper
>
&
zookeeper
,
const
String
&
lock_prefix
,
const
String
&
lock_name
,
const
String
&
lock_message
)
{
auto
zookeeper_holder
=
std
::
make_shared
<
zkutil
::
ZooKeeperHolder
>
();
zookeeper_holder
->
initFromInstance
(
zookeeper
);
return
std
::
make_unique
<
zkutil
::
Lock
>
(
std
::
move
(
zookeeper_holder
),
lock_prefix
,
lock_name
,
lock_message
);
}
static
bool
isSupportedAlterType
(
int
type
)
{
static
const
std
::
unordered_set
<
int
>
supported_alter_types
{
...
...
@@ -214,6 +224,7 @@ DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_, const
{
task_max_lifetime
=
config
->
getUInt64
(
prefix
+
"task_max_lifetime"
,
task_max_lifetime
);
cleanup_delay_period
=
config
->
getUInt64
(
prefix
+
"cleanup_delay_period"
,
cleanup_delay_period
);
max_tasks_in_queue
=
std
::
max
(
1UL
,
config
->
getUInt64
(
prefix
+
"max_tasks_in_queue "
,
max_tasks_in_queue
));
}
host_fqdn
=
getFQDNOrHostName
();
...
...
@@ -301,18 +312,24 @@ bool DDLWorker::initAndCheckTask(const String & entry_name)
}
static
void
filterAndSortQueueNodes
(
Strings
&
all_nodes
)
{
all_nodes
.
erase
(
std
::
remove_if
(
all_nodes
.
begin
(),
all_nodes
.
end
(),
[]
(
const
String
&
s
)
{
return
!
startsWith
(
s
,
"query-"
);
}),
all_nodes
.
end
());
std
::
sort
(
all_nodes
.
begin
(),
all_nodes
.
end
());
}
void
DDLWorker
::
processTasks
()
{
LOG_DEBUG
(
log
,
"Processing tasks"
);
Strings
queue_nodes
=
zookeeper
->
getChildren
(
queue_dir
,
nullptr
,
event_queue_updated
);
queue_nodes
.
erase
(
std
::
remove_if
(
queue_nodes
.
begin
(),
queue_nodes
.
end
(),
[
&
]
(
const
String
&
s
)
{
return
!
startsWith
(
s
,
"query-"
);
}),
queue_nodes
.
end
()
);
filterAndSortQueueNodes
(
queue_nodes
);
if
(
queue_nodes
.
empty
())
return
;
bool
server_startup
=
last_processed_task_name
.
empty
();
std
::
sort
(
queue_nodes
.
begin
(),
queue_nodes
.
end
());
auto
begin_node
=
server_startup
?
queue_nodes
.
begin
()
:
std
::
upper_bound
(
queue_nodes
.
begin
(),
queue_nodes
.
end
(),
last_processed_task_name
);
...
...
@@ -434,7 +451,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
return
;
LOG_WARNING
(
log
,
"Not found the exact match of host "
<<
task
.
host_id
.
readableString
()
<<
" from task "
<<
task
.
entry_name
<<
" in
"
<<
"
cluster "
<<
task
.
cluster_name
<<
" definition. Will try to find it using host name resolving."
);
<<
" in cluster "
<<
task
.
cluster_name
<<
" definition. Will try to find it using host name resolving."
);
bool
found_via_resolving
=
false
;
for
(
size_t
shard_num
=
0
;
shard_num
<
shards
.
size
();
++
shard_num
)
...
...
@@ -505,13 +522,26 @@ void DDLWorker::processTask(DDLTask & task)
{
LOG_DEBUG
(
log
,
"Processing task "
<<
task
.
entry_name
<<
" ("
<<
task
.
entry
.
query
<<
")"
);
createStatusDirs
(
task
.
entry_path
);
String
dummy
;
String
active_node_path
=
task
.
entry_path
+
"/active/"
+
task
.
host_id_str
;
auto
code
=
zookeeper
->
tryCreate
(
active_node_path
,
""
,
zkutil
::
CreateMode
::
Ephemeral
);
if
(
code
!=
ZOK
&&
code
!=
ZNODEEXISTS
)
String
finished_node_path
=
task
.
entry_path
+
"/finished/"
+
task
.
host_id_str
;
auto
code
=
zookeeper
->
tryCreateWithRetries
(
active_node_path
,
""
,
zkutil
::
CreateMode
::
Ephemeral
,
dummy
);
if
(
code
==
ZOK
||
code
==
ZNODEEXISTS
)
{
// Ok
}
else
if
(
code
==
ZNONODE
)
{
/// There is no parent
createStatusDirs
(
task
.
entry_path
);
if
(
ZOK
!=
zookeeper
->
tryCreateWithRetries
(
active_node_path
,
""
,
zkutil
::
CreateMode
::
Ephemeral
,
dummy
))
throw
zkutil
::
KeeperException
(
code
,
active_node_path
);
}
else
throw
zkutil
::
KeeperException
(
code
,
active_node_path
);
if
(
!
task
.
was_executed
)
{
try
...
...
@@ -548,13 +578,10 @@ void DDLWorker::processTask(DDLTask & task)
/// Delete active flag and create finish flag
zkutil
::
Ops
ops
;
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
task
.
entry_path
+
"/active/"
+
task
.
host_id_str
,
-
1
));
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Create
>
(
task
.
entry_path
+
"/finished/"
+
task
.
host_id_str
,
task
.
execution_status
.
serializeText
(),
zookeeper
->
getDefaultACL
(),
zkutil
::
CreateMode
::
Persistent
));
code
=
zookeeper
->
tryMulti
(
ops
);
if
(
code
!=
ZOK
)
throw
zkutil
::
KeeperException
(
"Cannot commit executed task to ZooKeeper "
+
task
.
entry_name
,
code
);
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
active_node_path
,
-
1
));
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Create
>
(
finished_node_path
,
task
.
execution_status
.
serializeText
(),
zookeeper
->
getDefaultACL
(),
zkutil
::
CreateMode
::
Persistent
));
zookeeper
->
multi
(
ops
);
}
...
...
@@ -617,10 +644,7 @@ void DDLWorker::processTaskAlter(
bool
alter_executed_by_replica
=
false
;
{
auto
zookeeper_holder
=
std
::
make_shared
<
zkutil
::
ZooKeeperHolder
>
();
zookeeper_holder
->
initFromInstance
(
zookeeper
);
zkutil
::
Lock
lock
(
zookeeper_holder
,
shard_path
,
"lock"
,
task
.
host_id_str
);
auto
lock
=
createSimpleZooKeeperLock
(
zookeeper
,
shard_path
,
"lock"
,
task
.
host_id_str
);
std
::
mt19937
rng
(
std
::
hash
<
String
>
{}(
task
.
host_id_str
)
+
reinterpret_cast
<
intptr_t
>
(
&
rng
));
for
(
int
num_tries
=
0
;
num_tries
<
10
;
++
num_tries
)
...
...
@@ -631,7 +655,7 @@ void DDLWorker::processTaskAlter(
break
;
}
if
(
lock
.
tryLock
())
if
(
lock
->
tryLock
())
{
tryExecuteQuery
(
rewritten_query
,
task
,
task
.
execution_status
);
...
...
@@ -641,7 +665,7 @@ void DDLWorker::processTaskAlter(
}
zookeeper
->
create
(
is_executed_path
,
task
.
host_id_str
,
zkutil
::
CreateMode
::
Persistent
);
lock
.
unlock
();
lock
->
unlock
();
alter_executed_by_replica
=
true
;
break
;
}
...
...
@@ -660,7 +684,7 @@ void DDLWorker::processTaskAlter(
}
void
DDLWorker
::
cleanupQueue
(
const
Strings
*
node_names_to_check
)
void
DDLWorker
::
cleanupQueue
()
{
/// Both ZK and Poco use Unix epoch
size_t
current_time_seconds
=
Poco
::
Timestamp
().
epochTime
();
...
...
@@ -674,111 +698,70 @@ void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
LOG_DEBUG
(
log
,
"Cleaning queue"
);
Strings
node_names_fetched
=
node_names_to_check
?
Strings
{}
:
zookeeper
->
getChildren
(
queue_dir
);
const
Strings
&
node_names
=
(
node_names_to_check
)
?
*
node_names_to_check
:
node_names_fetched
;
Strings
queue_nodes
=
zookeeper
->
getChildren
(
queue_dir
);
filterAndSortQueueNodes
(
queue_nodes
);
size_t
num_outdated_nodes
=
(
queue_nodes
.
size
()
>
max_tasks_in_queue
)
?
queue_nodes
.
size
()
-
max_tasks_in_queue
:
0
;
auto
first_non_outdated_node
=
queue_nodes
.
begin
()
+
num_outdated_nodes
;
for
(
const
String
&
node_name
:
node_names
)
for
(
auto
it
=
queue_nodes
.
cbegin
();
it
<
queue_nodes
.
cend
();
++
it
)
{
String
node_name
=
*
it
;
String
node_path
=
queue_dir
+
"/"
+
node_name
;
String
lock_path
=
node_path
+
"/lock_write"
;
/// per-node lock to avoid concurrent cleaning
bool
node_was_deleted
=
false
;
auto
delete_node
=
[
&
]
()
{
Strings
childs
=
zookeeper
->
getChildren
(
node_path
);
for
(
const
String
&
child
:
childs
)
{
if
(
child
!=
"lock_write"
)
zookeeper
->
removeRecursive
(
node_path
+
"/"
+
child
);
}
String
lock_path
=
node_path
+
"/lock"
;
zkutil
::
Ops
ops
;
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
lock_path
,
-
1
));
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
node_path
,
-
1
));
zookeeper
->
multi
(
ops
);
node_was_deleted
=
true
;
};
zkutil
::
Stat
stat
;
String
dummy
;
try
{
zkutil
::
Ops
ops
;
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Check
>
(
node_path
,
-
1
)
);
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Create
>
(
lock_path
,
host_fqdn_id
,
zookeeper
->
getDefaultACL
(),
zkutil
::
CreateMode
::
Ephemeral
))
;
auto
code
=
zookeeper
->
tryMulti
(
ops
);
if
(
code
!=
ZOK
)
/// To avoid concurrent checks and cleans
auto
lock
=
createSimpleZooKeeperLock
(
zookeeper
,
node_path
,
"lock"
,
host_fqdn_id
);
if
(
!
lock
->
tryLock
())
continue
;
auto
delete_node
=
[
&
]
(
)
{
if
(
code
==
ZNONODE
)
{
/// Task node was deleted
continue
;
}
else
if
(
code
==
ZNODEEXISTS
)
Strings
childs
=
zookeeper
->
getChildren
(
node_path
);
for
(
const
String
&
child
:
childs
)
{
/// Is it our lock?
String
owner
;
if
(
!
zookeeper
->
tryGet
(
lock_path
,
owner
)
||
owner
!=
host_fqdn_id
)
continue
;
if
(
child
!=
"lock"
)
zookeeper
->
tryRemoveRecursive
(
node_path
+
"/"
+
child
);
}
else
throw
zkutil
::
KeeperException
(
code
);
}
SCOPE_EXIT
({
if
(
!
node_was_deleted
&&
!
zookeeper
->
expired
())
{
try
{
zookeeper
->
tryRemoveWithRetries
(
lock_path
,
-
1
);
}
catch
(...)
{
tryLogCurrentException
(
log
,
"Can't remove lock for cleaning"
);
}
}
});
/// Remove the lock node and its parent atomically
zkutil
::
Ops
ops
;
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
lock_path
,
-
1
));
ops
.
emplace_back
(
std
::
make_unique
<
zkutil
::
Op
::
Remove
>
(
node_path
,
-
1
));
zookeeper
->
multi
(
ops
);
zkutil
::
Stat
stat
;
String
node_data
=
zookeeper
->
get
(
node_path
,
&
stat
)
;
lock
->
unlockAssumeLockNodeRemovedManually
()
;
}
;
size_t
zookeeper_time_seconds
=
stat
.
mtime
/
zookeeper_time_resolution
;
if
(
zookeeper_time_seconds
+
task_max_lifetime
<
current_time_seconds
)
/// Skip if there are active nodes (it is weak guard)
if
(
zookeeper
->
tryGet
(
node_path
+
"/active"
,
dummy
,
&
stat
)
&&
stat
.
numChildren
>
0
)
continue
;
/// Delete if too many (max_tasks_in_queue) task in queue
if
(
it
<
first_non_outdated_node
)
{
size_t
lifetime_seconds
=
current_time_seconds
-
zookeeper_time_seconds
;
LOG_INFO
(
log
,
"Lifetime of task "
<<
node_name
<<
" ("
<<
lifetime_seconds
<<
" sec.) is expired, deleting it"
);
LOG_INFO
(
log
,
"Task "
<<
node_name
<<
" is outdated, deleting it"
)
;
delete_node
();
continue
;
}
Strings
active_hosts
=
zookeeper
->
getChildren
(
node_path
+
"/active"
);
if
(
!
active_hosts
.
empty
())
continue
;
Strings
finished_hosts
=
zookeeper
->
getChildren
(
node_path
+
"/finished"
);
DDLLogEntry
entry
;
entry
.
parse
(
node_data
);
/// Not all nodes were finished
if
(
finished_hosts
.
size
()
<
entry
.
hosts
.
size
())
continue
;
zookeeper
->
get
(
node_path
,
&
stat
);
size_t
zookeeper_time_seconds
=
stat
.
mtime
/
zookeeper_time_resolution
;
/// Could be childs that are not from host list
bool
all_finished
=
true
;
NameSet
finished_hosts_set
(
finished_hosts
.
begin
(),
finished_hosts
.
end
());
for
(
const
HostID
&
host
:
entry
.
hosts
)
/// Delte if node lifetmie (task_max_lifetime) is expired
if
(
zookeeper_time_seconds
+
task_max_lifetime
<
current_time_seconds
)
{
if
(
!
finished_hosts_set
.
count
(
host
.
toString
()))
{
all_finished
=
false
;
break
;
}
}
size_t
lifetime_seconds
=
current_time_seconds
-
zookeeper_time_seconds
;
LOG_INFO
(
log
,
"Lifetime of task "
<<
node_name
<<
" ("
<<
lifetime_seconds
<<
" sec.) is expired, deleting it"
);
if
(
all_finished
)
{
LOG_INFO
(
log
,
"Task "
<<
node_name
<<
" had been executed by each host, deleting it"
);
delete_node
();
continue
;
}
}
catch
(...)
...
...
@@ -806,13 +789,35 @@ void DDLWorker::createStatusDirs(const std::string & node_path)
String
DDLWorker
::
enqueueQuery
(
DDLLogEntry
&
entry
)
{
if
(
entry
.
hosts
.
empty
())
return
{}
;
throw
Exception
(
"Empty host list in a distributed DDL task"
,
ErrorCodes
::
LOGICAL_ERROR
)
;
String
query_path_prefix
=
queue_dir
+
"/query-"
;
zookeeper
->
createAncestors
(
query_path_prefix
);
String
node_path
=
zookeeper
->
create
(
query_path_prefix
,
entry
.
toString
(),
zkutil
::
CreateMode
::
PersistentSequential
);
createStatusDirs
(
node_path
);
String
node_path
;
try
{
node_path
=
zookeeper
->
create
(
query_path_prefix
,
entry
.
toString
(),
zkutil
::
CreateMode
::
PersistentSequential
);
}
catch
(
const
zkutil
::
KeeperException
&
e
)
{
/// TODO: This condition could be relaxed with additional post-checks
if
(
e
.
isTemporaryError
())
throw
Exception
(
"Unknown status of distributed DDL task"
,
ErrorCodes
::
UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK
);
throw
;
}
/// Optional step
try
{
createStatusDirs
(
node_path
);
}
catch
(...)
{
LOG_INFO
(
log
,
"An error occurred while creating auxiliary ZooKeeper directories in "
<<
node_path
<<
" . They will be created later"
<<
". Error : "
<<
getCurrentExceptionMessage
(
true
));
}
return
node_path
;
}
...
...
dbms/src/Interpreters/DDLWorker.h
浏览文件 @
e4785aeb
...
...
@@ -59,7 +59,7 @@ private:
/// Checks and cleanups queue's nodes
void
cleanupQueue
(
const
Strings
*
node_names_to_check
=
nullptr
);
void
cleanupQueue
();
void
createStatusDirs
(
const
std
::
string
&
node_name
);
...
...
@@ -93,10 +93,12 @@ private:
size_t
last_cleanup_time_seconds
=
0
;
/// Delete node if its age is greater than that
size_t
task_max_lifetime
=
7
*
24
*
60
*
60
;
// week (in seconds)
/// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
size_t
cleanup_delay_period
=
60
;
// minute (in seconds)
/// Delete node if its age is greater than that
size_t
task_max_lifetime
=
7
*
24
*
60
*
60
;
// week (in seconds)
/// How many tasks could be in the queue
size_t
max_tasks_in_queue
=
1000
;
friend
class
DDLQueryStatusInputSream
;
friend
class
DDLTask
;
...
...
dbms/tests/integration/helpers/client.py
浏览文件 @
e4785aeb
...
...
@@ -8,7 +8,7 @@ class Client:
def
__init__
(
self
,
host
,
port
=
9000
,
command
=
'/usr/bin/clickhouse-client'
):
self
.
host
=
host
self
.
port
=
port
self
.
command
=
[
command
,
'--host'
,
self
.
host
,
'--port'
,
str
(
self
.
port
)]
self
.
command
=
[
command
,
'--host'
,
self
.
host
,
'--port'
,
str
(
self
.
port
)
,
'--stacktrace'
]
def
query
(
self
,
sql
,
stdin
=
None
,
timeout
=
None
):
...
...
dbms/tests/integration/helpers/cluster.py
浏览文件 @
e4785aeb
...
...
@@ -49,7 +49,8 @@ class ClickHouseCluster:
self
.
is_up
=
False
def
add_instance
(
self
,
name
,
config_dir
=
None
,
main_configs
=
[],
user_configs
=
[],
macroses
=
{},
with_zookeeper
=
False
,
clickhouse_path_dir
=
None
):
def
add_instance
(
self
,
name
,
config_dir
=
None
,
main_configs
=
[],
user_configs
=
[],
macroses
=
{},
with_zookeeper
=
False
,
clickhouse_path_dir
=
None
,
hostname
=
None
):
"""Add an instance to the cluster.
name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
...
...
@@ -65,7 +66,10 @@ class ClickHouseCluster:
if
name
in
self
.
instances
:
raise
Exception
(
"Can
\'
t add instance `%s': there is already an instance with the same name!"
%
name
)
instance
=
ClickHouseInstance
(
self
,
self
.
base_dir
,
name
,
config_dir
,
main_configs
,
user_configs
,
macroses
,
with_zookeeper
,
self
.
base_configs_dir
,
self
.
server_bin_path
,
clickhouse_path_dir
)
instance
=
ClickHouseInstance
(
self
,
self
.
base_dir
,
name
,
config_dir
,
main_configs
,
user_configs
,
macroses
,
with_zookeeper
,
self
.
base_configs_dir
,
self
.
server_bin_path
,
clickhouse_path_dir
,
hostname
=
hostname
)
self
.
instances
[
name
]
=
instance
self
.
base_cmd
.
extend
([
'--file'
,
instance
.
docker_compose_path
])
if
with_zookeeper
and
not
self
.
with_zookeeper
:
...
...
@@ -135,7 +139,7 @@ version: '2'
services:
{name}:
image: ubuntu:14.04
hostname: {name}
hostname: {
host
name}
user: '{uid}'
volumes:
- {binary_path}:/usr/bin/clickhouse:ro
...
...
@@ -153,12 +157,13 @@ services:
class
ClickHouseInstance
:
def
__init__
(
self
,
cluster
,
base_path
,
name
,
custom_config_dir
,
custom_main_configs
,
custom_user_configs
,
macroses
,
with_zookeeper
,
base_configs_dir
,
server_bin_path
,
clickhouse_path_dir
):
with_zookeeper
,
base_configs_dir
,
server_bin_path
,
clickhouse_path_dir
,
hostname
=
None
):
self
.
name
=
name
self
.
base_cmd
=
cluster
.
base_cmd
[:]
self
.
docker_id
=
cluster
.
get_instance_docker_id
(
self
.
name
)
self
.
cluster
=
cluster
self
.
hostname
=
hostname
if
hostname
is
not
None
else
self
.
name
self
.
custom_config_dir
=
p
.
abspath
(
p
.
join
(
base_path
,
custom_config_dir
))
if
custom_config_dir
else
None
self
.
custom_main_config_paths
=
[
p
.
abspath
(
p
.
join
(
base_path
,
c
))
for
c
in
custom_main_configs
]
...
...
@@ -187,6 +192,16 @@ class ClickHouseInstance:
return
self
.
client
.
get_query_request
(
*
args
,
**
kwargs
)
def
exec_in_container
(
self
,
cmd
,
**
kwargs
):
container
=
self
.
get_docker_handle
()
handle
=
self
.
docker_client
.
api
.
exec_create
(
container
.
id
,
cmd
,
**
kwargs
)
output
=
self
.
docker_client
.
api
.
exec_start
(
handle
).
decode
(
'utf8'
)
exit_code
=
self
.
docker_client
.
api
.
exec_inspect
(
handle
)[
'ExitCode'
]
if
exit_code
:
raise
Exception
(
'Cmd {} failed! Return code {}. Output {}'
.
format
(
' '
.
join
(
cmd
),
exit_code
,
output
))
return
output
def
get_docker_handle
(
self
):
return
self
.
docker_client
.
containers
.
get
(
self
.
docker_id
)
...
...
@@ -301,6 +316,7 @@ class ClickHouseInstance:
with
open
(
self
.
docker_compose_path
,
'w'
)
as
docker_compose
:
docker_compose
.
write
(
DOCKER_COMPOSE_TEMPLATE
.
format
(
name
=
self
.
name
,
hostname
=
self
.
hostname
,
uid
=
os
.
getuid
(),
binary_path
=
self
.
server_bin_path
,
configs_dir
=
configs_dir
,
...
...
dbms/tests/integration/helpers/network.py
浏览文件 @
e4785aeb
...
...
@@ -56,6 +56,15 @@ class PartitionManager:
rule
=
self
.
_iptables_rules
.
pop
()
_NetworkManager
.
get
().
delete_iptables_rule
(
**
rule
)
def
pop_rules
(
self
):
res
=
self
.
_iptables_rules
[:]
self
.
heal_all
()
return
res
def
push_rules
(
self
,
rules
):
for
rule
in
rules
:
self
.
_add_rule
(
rule
)
@
staticmethod
def
_check_instance
(
instance
):
...
...
@@ -77,6 +86,18 @@ class PartitionManager:
self
.
heal_all
()
class
PartitionManagerDisbaler
:
def
__init__
(
self
,
manager
):
self
.
manager
=
manager
self
.
rules
=
self
.
manager
.
pop_rules
()
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
manager
.
push_rules
(
self
.
rules
)
class
_NetworkManager
:
"""Execute commands inside a container with access to network settings.
...
...
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster.xml
已删除
100644 → 0
浏览文件 @
c2e84d6d
<yandex>
<remote_servers>
<cluster>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster>
</remote_servers>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_default_database.xml
已删除
100644 → 0
浏览文件 @
c2e84d6d
<yandex>
<remote_servers>
<cluster2>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
<default_database>
default
</default_database>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
<default_database>
test2
</default_database>
</replica>
</shard>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
<default_database>
default
</default_database>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
<default_database>
test2
</default_database>
</replica>
</shard>
</cluster2>
</remote_servers>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_no_replicas.xml
已删除
100644 → 0
浏览文件 @
c2e84d6d
<yandex>
<remote_servers>
<cluster_no_replicas>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster_no_replicas>
</remote_servers>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/configs/config.d/cluster_without_internal_replication.xml
已删除
100644 → 0
浏览文件 @
c2e84d6d
<yandex>
<remote_servers>
<cluster_without_replication>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster_without_replication>
</remote_servers>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/configs/config.d/clusters.xml
0 → 100644
浏览文件 @
e4785aeb
<yandex>
<remote_servers>
<!-- Main cluster -->
<cluster>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster>
<!-- Cluster with specified default database -->
<cluster2>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
<default_database>
default
</default_database>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
<default_database>
test2
</default_database>
</replica>
</shard>
<shard>
<internal_replication>
true
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
<default_database>
default
</default_database>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
<default_database>
test2
</default_database>
</replica>
</shard>
</cluster2>
<!-- Cluster without replicas -->
<cluster_no_replicas>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster_no_replicas>
<!-- Cluster without internal replication -->
<cluster_without_replication>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch1
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch2
</host>
<port>
9000
</port>
</replica>
</shard>
<shard>
<internal_replication>
false
</internal_replication>
<replica>
<host>
ch3
</host>
<port>
9000
</port>
</replica>
<replica>
<host>
ch4
</host>
<port>
9000
</port>
</replica>
</shard>
</cluster_without_replication>
</remote_servers>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/configs/config.d/ddl.xml
浏览文件 @
e4785aeb
<yandex>
<distributed_ddl>
<path>
/clickhouse/task_queue/ddl
</path>
<max_tasks_in_queue>
10
</max_tasks_in_queue>
<task_max_lifetime>
3600
</task_max_lifetime>
<cleanup_delay_period>
1
</cleanup_delay_period>
</distributed_ddl>
</yandex>
\ No newline at end of file
dbms/tests/integration/test_distributed_ddl/test.py
浏览文件 @
e4785aeb
...
...
@@ -4,7 +4,7 @@ import datetime
import
pytest
from
helpers.cluster
import
ClickHouseCluster
from
helpers.network
import
PartitionManager
from
helpers.network
import
PartitionManager
,
PartitionManagerDisbaler
from
helpers.test_tools
import
TSV
...
...
@@ -17,9 +17,9 @@ def check_all_hosts_sucesfully_executed(tsv_content, num_hosts=None):
codes
=
[
l
[
2
]
for
l
in
M
]
messages
=
[
l
[
3
]
for
l
in
M
]
assert
len
(
hosts
)
==
num_hosts
and
len
(
set
(
hosts
))
==
num_hosts
,
tsv_content
assert
len
(
set
(
codes
))
==
1
,
tsv_content
assert
codes
[
0
]
==
"0"
,
tsv_content
assert
len
(
hosts
)
==
num_hosts
and
len
(
set
(
hosts
))
==
num_hosts
,
"
\n
"
+
tsv_content
assert
len
(
set
(
codes
))
==
1
,
"
\n
"
+
tsv_content
assert
codes
[
0
]
==
"0"
,
"
\n
"
+
tsv_content
def
ddl_check_query
(
instance
,
query
,
num_hosts
=
None
):
...
...
@@ -50,8 +50,19 @@ TEST_REPLICATED_ALTERS=True
cluster
=
ClickHouseCluster
(
__file__
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
started_cluster
():
def
replace_domains_to_ip_addresses_in_cluster_config
(
instances_to_replace
):
clusters_config
=
open
(
p
.
join
(
cluster
.
base_dir
,
'configs/config.d/clusters.xml'
)).
read
()
for
inst_name
,
inst
in
cluster
.
instances
.
items
():
clusters_config
=
clusters_config
.
replace
(
inst_name
,
str
(
inst
.
ip_address
))
for
inst_name
in
instances_to_replace
:
inst
=
cluster
.
instances
[
inst_name
]
cluster
.
instances
[
inst_name
].
exec_in_container
([
'bash'
,
'-c'
,
'echo "$NEW_CONFIG" > /etc/clickhouse-server/config.d/clusters.xml'
],
environment
=
{
"NEW_CONFIG"
:
clusters_config
},
privileged
=
True
)
# print cluster.instances[inst_name].exec_in_container(['cat', "/etc/clickhouse-server/config.d/clusters.xml"])
def
init_cluster
(
cluster
):
try
:
for
i
in
xrange
(
4
):
cluster
.
add_instance
(
...
...
@@ -62,15 +73,19 @@ def started_cluster():
cluster
.
start
()
# Replace config files for testing ability to set host in DNS and IP formats
replace_domains_to_ip_addresses_in_cluster_config
([
'ch1'
,
'ch3'
])
# Select sacrifice instance to test CONNECTION_LOSS and server fail on it
sacrifice
=
cluster
.
instances
[
'ch4'
]
cluster
.
pm_random_drops
=
PartitionManager
()
cluster
.
pm_random_drops
.
_add_rule
({
'probability'
:
0.0
5
,
'destination'
:
sacrifice
.
ip_address
,
'source_port'
:
2181
,
'action'
:
'REJECT --reject-with tcp-reset'
})
cluster
.
pm_random_drops
.
_add_rule
({
'probability'
:
0.0
5
,
'source'
:
sacrifice
.
ip_address
,
'destination_port'
:
2181
,
'action'
:
'REJECT --reject-with tcp-reset'
})
cluster
.
pm_random_drops
.
_add_rule
({
'probability'
:
0.0
1
,
'destination'
:
sacrifice
.
ip_address
,
'source_port'
:
2181
,
'action'
:
'REJECT --reject-with tcp-reset'
})
cluster
.
pm_random_drops
.
_add_rule
({
'probability'
:
0.0
1
,
'source'
:
sacrifice
.
ip_address
,
'destination_port'
:
2181
,
'action'
:
'REJECT --reject-with tcp-reset'
})
# Initialize databases and service tables
instance
=
cluster
.
instances
[
'ch1'
]
instance
.
query
(
"SELECT 1"
)
ddl_check_query
(
instance
,
"""
CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
(database String, name String, engine String, metadata_modification_time DateTime)
...
...
@@ -79,20 +94,31 @@ CREATE TABLE IF NOT EXISTS all_tables ON CLUSTER 'cluster_no_replicas'
ddl_check_query
(
instance
,
"CREATE DATABASE IF NOT EXISTS test ON CLUSTER 'cluster'"
)
except
Exception
as
e
:
print
e
raise
@
pytest
.
fixture
(
scope
=
"module"
)
def
started_cluster
():
try
:
init_cluster
(
cluster
)
yield
cluster
instance
=
cluster
.
instances
[
'ch1'
]
ddl_check_query
(
instance
,
"DROP DATABASE test ON CLUSTER 'cluster'"
)
ddl_check_query
(
instance
,
"DROP DATABASE IF EXISTS test2 ON CLUSTER 'cluster'"
)
finally
:
# Remove iptables rules for sacrifice instance
cluster
.
pm_random_drops
.
heal_all
()
# Check query log to ensure that DDL queries are not executed twice
time
.
sleep
(
1.5
)
for
instance
in
cluster
.
instances
.
values
():
ddl_check_there_are_no_dublicates
(
instance
)
finally
:
# Remove iptables rules for sacrifice instance
cluster
.
pm_random_drops
.
heal_all
()
#cluster.shutdown()
...
...
@@ -163,10 +189,14 @@ def test_replicated_alters(started_cluster):
if
not
TEST_REPLICATED_ALTERS
:
return
# Temporarily disable random ZK packet drops, they might broke creation if ReplicatedMergeTree replicas
firewall_drops_rules
=
cluster
.
pm_random_drops
.
pop_rules
()
ddl_check_query
(
instance
,
"""
CREATE TABLE IF NOT EXISTS merge ON CLUSTER cluster (p Date, i Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', p, p, 1)
"""
)
ddl_check_query
(
instance
,
"""
CREATE TABLE IF NOT EXISTS all_merge_32 ON CLUSTER cluster (p Date, i Int32)
ENGINE = Distributed(cluster, default, merge, i)
...
...
@@ -200,6 +230,10 @@ ENGINE = Distributed(cluster, default, merge, i)
assert
TSV
(
instance
.
query
(
"SELECT i, s FROM all_merge_64 ORDER BY i"
))
==
TSV
(
''
.
join
([
'{}
\t
{}
\n
'
.
format
(
x
,
x
)
for
x
in
xrange
(
4
)]))
ddl_check_query
(
instance
,
"DROP TABLE merge ON CLUSTER cluster"
)
# Enable random ZK packet drops
cluster
.
pm_random_drops
.
push_rules
(
firewall_drops_rules
)
ddl_check_query
(
instance
,
"DROP TABLE all_merge_32 ON CLUSTER cluster"
)
ddl_check_query
(
instance
,
"DROP TABLE all_merge_64 ON CLUSTER cluster"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录