提交 64e0c927 编写于 作者: J jinhai

Merge branch 'branch-0.3.1' into 'branch-0.3.1'

MS-324 Show error when there is not enough gpu memory to build index

See merge request megasearch/milvus!312

Former-commit-id: c020e5d40a7384803e0a35982be44557812774bc
...@@ -39,10 +39,11 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -39,10 +39,11 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option - MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option
- MS-266 - Improve topk reduce time by using multi-threads - MS-266 - Improve topk reduce time by using multi-threads
- MS-275 - Avoid sqlite logic error excetion - MS-275 - Avoid sqlite logic error excetion
- MS-278 - add IndexStatsHelper - MS-278 - Add IndexStatsHelper
- MS-312 - Set openmp thread number by config - MS-312 - Set openmp thread number by config
- MS-305 - add CPU core percent metric - MS-305 - Add CPU core percent metric
- MS-310 - add milvus CPU utilization ratio and CPU/GPU temperature metrics - MS-310 - Add milvus CPU utilization ratio and CPU/GPU temperature metrics
- MS-324 - Show error when there is not enough gpu memory to build index
## New Feature ## New Feature
- MS-180 - Add new mem manager - MS-180 - Add new mem manager
......
...@@ -535,11 +535,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { ...@@ -535,11 +535,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
} }
//step 3: build index //step 3: build index
std::shared_ptr<ExecutionEngine> index;
try {
auto start_time = METRICS_NOW_TIME; auto start_time = METRICS_NOW_TIME;
auto index = to_index->BuildIndex(table_file.location_); index = to_index->BuildIndex(table_file.location_);
auto end_time = METRICS_NOW_TIME; auto end_time = METRICS_NOW_TIME;
auto total_time = METRICS_MICROSECONDS(start_time, end_time); auto total_time = METRICS_MICROSECONDS(start_time, end_time);
server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time);
} catch (std::exception& ex) {
//typical error: out of gpu memory
std::string msg = "BuildIndex encounter exception" + std::string(ex.what());
ENGINE_LOG_ERROR << msg;
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough" << std::endl;
return Status::Error(msg);
}
//step 4: if table has been deleted, dont save index file //step 4: if table has been deleted, dont save index file
bool has_table = false; bool has_table = false;
...@@ -550,7 +566,22 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { ...@@ -550,7 +566,22 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
} }
//step 5: save index file //step 5: save index file
try {
index->Serialize(); index->Serialize();
} catch (std::exception& ex) {
//typical error: out of disk space or permition denied
std::string msg = "Serialize index encounter exception" + std::string(ex.what());
ENGINE_LOG_ERROR << msg;
table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
status = meta_ptr_->UpdateTableFile(table_file);
ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
std::cout << "ERROR: failed to persist index file: " << table_file.location_
<< ", possible out of disk space" << std::endl;
return Status::Error(msg);
}
//step 6: update meta //step 6: update meta
table_file.file_type_ = meta::TableFileSchema::INDEX; table_file.file_type_ = meta::TableFileSchema::INDEX;
......
...@@ -932,7 +932,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { ...@@ -932,7 +932,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) {
table_file.date_ = std::get<3>(file); table_file.date_ = std::get<3>(file);
utils::DeleteTableFilePath(options_, table_file); utils::DeleteTableFilePath(options_, table_file);
ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_;
ConnectorPtr->remove<TableFileSchema>(table_file.id_); ConnectorPtr->remove<TableFileSchema>(table_file.id_);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册