提交 6bddd4c1 编写于 作者: V Vitaliy Lyudvichenko 提交者: alexey-milovidov

Clean incomplete part nodes in ZooKeeper. [#CLICKHOUSE-3040]

上级 ba499585
...@@ -878,7 +878,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) ...@@ -878,7 +878,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
LOG_ERROR(log, "Removing unexpectedly merged local part from ZooKeeper: " << name); LOG_ERROR(log, "Removing unexpectedly merged local part from ZooKeeper: " << name);
zkutil::Ops ops; zkutil::Ops ops;
removePartFromZooKeeper(name, ops); removePossiblyIncompletePartNodeFromZooKeeper(name, ops, zookeeper);
zookeeper->multi(ops); zookeeper->multi(ops);
} }
...@@ -895,7 +895,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) ...@@ -895,7 +895,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
/// We assume that this occurs before the queue is loaded (queue.initialize). /// We assume that this occurs before the queue is loaded (queue.initialize).
zkutil::Ops ops; zkutil::Ops ops;
removePartFromZooKeeper(name, ops); removePossiblyIncompletePartNodeFromZooKeeper(name, ops, zookeeper);
ops.emplace_back(std::make_unique<zkutil::Op::Create>( ops.emplace_back(std::make_unique<zkutil::Op::Create>(
replica_path + "/queue/queue-", log_entry.toString(), zookeeper->getDefaultACL(), zkutil::CreateMode::PersistentSequential)); replica_path + "/queue/queue-", log_entry.toString(), zookeeper->getDefaultACL(), zkutil::CreateMode::PersistentSequential));
zookeeper->multi(ops); zookeeper->multi(ops);
...@@ -1876,6 +1876,23 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam ...@@ -1876,6 +1876,23 @@ void StorageReplicatedMergeTree::removePartFromZooKeeper(const String & part_nam
} }
/// Workarond for known ZooKeeper problem, see CLICKHOUSE-3040 and ZOOKEEPER-2362
void StorageReplicatedMergeTree::removePossiblyIncompletePartNodeFromZooKeeper(const String & part_name, zkutil::Ops & ops, const zkutil::ZooKeeperPtr & zookeeper)
{
String part_path = replica_path + "/parts/" + part_name;
Names children_ = zookeeper->getChildren(part_path);
NameSet children(children_.begin(), children_.end());
if (children.size() != 2)
LOG_WARNING(log, "Will remove incomplete part node " << part_path << " from ZooKeeper");
if (children.count("checksums"))
ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path + "/checksums", -1));
if (children.count("columns"))
ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path + "/columns", -1));
ops.emplace_back(std::make_unique<zkutil::Op::Remove>(part_path, -1));
}
void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name) void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_name)
{ {
auto zookeeper = getZooKeeper(); auto zookeeper = getZooKeeper();
...@@ -3784,11 +3801,16 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_) ...@@ -3784,11 +3801,16 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK(Logger * log_)
LOG_DEBUG(log, "Removing " << part->name); LOG_DEBUG(log, "Removing " << part->name);
zkutil::Ops ops; try
removePartFromZooKeeper(part->name, ops); {
auto code = zookeeper->tryMulti(ops); zkutil::Ops ops;
if (code != ZOK) removePossiblyIncompletePartNodeFromZooKeeper(part->name, ops, zookeeper);
LOG_WARNING(log, "Couldn't remove " << part->name << " from ZooKeeper: " << zkutil::ZooKeeper::error2string(code)); zookeeper->multi(ops);
}
catch (const zkutil::KeeperException & e)
{
LOG_WARNING(log, "Couldn't remove " << part->name << " from ZooKeeper: " << zkutil::ZooKeeper::error2string(e.code));
}
part->remove(); part->remove();
parts.pop_back(); parts.pop_back();
......
...@@ -378,6 +378,10 @@ private: ...@@ -378,6 +378,10 @@ private:
/// Adds actions to `ops` that remove a part from ZooKeeper. /// Adds actions to `ops` that remove a part from ZooKeeper.
void removePartFromZooKeeper(const String & part_name, zkutil::Ops & ops); void removePartFromZooKeeper(const String & part_name, zkutil::Ops & ops);
/// Like removePartFromZooKeeper, but handles NONODE and remove node anyway, see CLICKHOUSE-3040
/// Use it only in non-critical places for cleaning.
void removePossiblyIncompletePartNodeFromZooKeeper(const String & part_name, zkutil::Ops & ops, const zkutil::ZooKeeperPtr & zookeeper);
/// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts.
void removePartAndEnqueueFetch(const String & part_name); void removePartAndEnqueueFetch(const String & part_name);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册