diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index cb99c30d8837ffe218ab5e2e2d8f96438331ead5..e62731de771d468eb8b4cd9c37e2cefc23f7167c 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -141,50 +142,67 @@ String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, I } -bool ActiveDataPartSet::isPartDirectory(const String & dir_name, Poco::RegularExpression::MatchVec * out_matches) +bool ActiveDataPartSet::isPartDirectory(const String & dir_name) { - Poco::RegularExpression::MatchVec matches; - static Poco::RegularExpression file_name_regexp("^(\\d{8})_(\\d{8})_(-?\\d+)_(-?\\d+)_(\\d+)"); - bool res = (file_name_regexp.match(dir_name, 0, matches) && 6 == matches.size()); - if (out_matches) - *out_matches = matches; - return res; + return parsePartNameImpl(dir_name, nullptr); } - -void ActiveDataPartSet::parsePartName(const String & file_name, Part & part, const Poco::RegularExpression::MatchVec * matches_p) +bool ActiveDataPartSet::parsePartNameImpl(const String & dir_name, Part * part) { - Poco::RegularExpression::MatchVec match_vec; - if (!matches_p) + UInt32 min_yyyymmdd = 0; + UInt32 max_yyyymmdd = 0; + Int64 min_block_num = 0; + Int64 max_block_num = 0; + UInt32 level = 0; + + ReadBufferFromString in(dir_name); + + if (!tryReadIntText(min_yyyymmdd, in) + || !checkChar('_', in) + || !tryReadIntText(max_yyyymmdd, in) + || !checkChar('_', in) + || !tryReadIntText(min_block_num, in) + || !checkChar('_', in) + || !tryReadIntText(max_block_num, in) + || !checkChar('_', in) + || !tryReadIntText(level, in) + || !in.eof()) { - if (!isPartDirectory(file_name, &match_vec)) - throw Exception("Unexpected part name: " + file_name, ErrorCodes::BAD_DATA_PART_NAME); - matches_p = &match_vec; + return false; } - const Poco::RegularExpression::MatchVec & matches = *matches_p; + if (part) + { + const auto & date_lut = DateLUT::instance(); - const auto & date_lut = DateLUT::instance(); + part->left_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd); + part->right_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd); + part->left = min_block_num; + part->right = max_block_num; + part->level = level; - part.left_date = date_lut.YYYYMMDDToDayNum(parse(file_name.substr(matches[1].offset, matches[1].length))); - part.right_date = date_lut.YYYYMMDDToDayNum(parse(file_name.substr(matches[2].offset, matches[2].length))); - part.left = parse(file_name.substr(matches[3].offset, matches[3].length)); - part.right = parse(file_name.substr(matches[4].offset, matches[4].length)); - part.level = parse(file_name.substr(matches[5].offset, matches[5].length)); + DayNum_t left_month = date_lut.toFirstDayNumOfMonth(part->left_date); + DayNum_t right_month = date_lut.toFirstDayNumOfMonth(part->right_date); - DayNum_t left_month = date_lut.toFirstDayNumOfMonth(part.left_date); - DayNum_t right_month = date_lut.toFirstDayNumOfMonth(part.right_date); + if (left_month != right_month) + throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); - if (left_month != right_month) - throw Exception("Part name " + file_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); + part->month = left_month; + } - part.month = left_month; + return true; } +void ActiveDataPartSet::parsePartName(const String & dir_name, Part & part) +{ + if (!parsePartNameImpl(dir_name, &part)) + throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME); +} bool ActiveDataPartSet::contains(const String & outer_part_name, const String & inner_part_name) { - Part outer, inner; + Part outer; + Part inner; parsePartName(outer_part_name, outer); parsePartName(inner_part_name, inner); return outer.contains(inner); diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h index 58f89574fb6e683454cfe10979e71e9a5ff54385..657e5956141771920b6db07b5257abdf9a2f149b 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.h @@ -1,10 +1,10 @@ #pragma once #include -#include #include #include #include + namespace DB { @@ -69,10 +69,12 @@ public: static String getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level); /// Returns true if the directory name matches the format of the directory name of the parts - static bool isPartDirectory(const String & dir_name, Poco::RegularExpression::MatchVec * out_matches = nullptr); + static bool isPartDirectory(const String & dir_name); + + static bool parsePartNameImpl(const String & dir_name, Part * part); /// Put data in DataPart from the name of the part. - static void parsePartName(const String & file_name, Part & part, const Poco::RegularExpression::MatchVec * matches = nullptr); + static void parsePartName(const String & dir_name, Part & part); static bool contains(const String & outer_part_name, const String & inner_part_name); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 5e19eb66994eb1502f007f15c54f672bf07fd81a..efea7b58feba7bd34cfee50f900edd8b99ff8e2f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -306,16 +306,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) DataPartsVector broken_parts_to_detach; size_t suspicious_broken_parts = 0; - Poco::RegularExpression::MatchVec matches; for (const String & file_name : part_file_names) { - if (!ActiveDataPartSet::isPartDirectory(file_name, &matches)) + MutableDataPartPtr part = std::make_shared(*this); + if (!ActiveDataPartSet::parsePartNameImpl(file_name, part.get())) continue; - MutableDataPartPtr part = std::make_shared(*this); - ActiveDataPartSet::parsePartName(file_name, *part, &matches); part->name = file_name; - bool broken = false; try @@ -365,10 +362,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) { if (contained_name == file_name) continue; - if (!ActiveDataPartSet::isPartDirectory(contained_name, &matches)) - continue; + DataPart contained_part(*this); - ActiveDataPartSet::parsePartName(contained_name, contained_part, &matches); + if (!ActiveDataPartSet::parsePartNameImpl(contained_name, &contained_part)) + continue; + if (part->contains(contained_part)) { LOG_ERROR(log, "Found part " << full_path + contained_name); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d7bbfe91b99830a6c757c9690a4744513b441b97..7c9be0f14f6c755e50d82c83bc58c71e6f1a77af 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -468,8 +468,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( return false; /** When the corresponding action is completed, then `shouldExecuteLogEntry` next time, will succeed, - * and queue element will be processed. Immediately in the `executeLogEntry` function it will be found that we already have a part, - , + * and queue element will be processed. + * Immediately in the `executeLogEntry` function it will be found that we already have a part, * and queue element will be immediately treated as processed. */ }