diff --git a/cpp/CHANGELOG.md b/cpp/CHANGELOG.md index ea37ff28c8e965ceeb489bb74df0457756dbdb87..b2e11cd83c82760e5d6dd4c2becb0e496e3f8ac2 100644 --- a/cpp/CHANGELOG.md +++ b/cpp/CHANGELOG.md @@ -39,10 +39,11 @@ Please mark all change in change log and use the ticket from JIRA. - MS-261 - Update faiss version to 1.5.3 and add BUILD_FAISS_WITH_MKL as an option - MS-266 - Improve topk reduce time by using multi-threads - MS-275 - Avoid sqlite logic error excetion -- MS-278 - add IndexStatsHelper +- MS-278 - Add IndexStatsHelper - MS-312 - Set openmp thread number by config -- MS-305 - add CPU core percent metric -- MS-310 - add milvus CPU utilization ratio and CPU/GPU temperature metrics +- MS-305 - Add CPU core percent metric +- MS-310 - Add milvus CPU utilization ratio and CPU/GPU temperature metrics +- MS-324 - Show error when there is not enough gpu memory to build index ## New Feature - MS-180 - Add new mem manager diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp index fa4066e27c88503680d717f334bc3c89855f4380..d8f894fbc8ed7f45c1060dc05422ceb54d2a1486 100644 --- a/cpp/src/db/DBImpl.cpp +++ b/cpp/src/db/DBImpl.cpp @@ -535,11 +535,27 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { } //step 3: build index - auto start_time = METRICS_NOW_TIME; - auto index = to_index->BuildIndex(table_file.location_); - auto end_time = METRICS_NOW_TIME; - auto total_time = METRICS_MICROSECONDS(start_time, end_time); - server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); + std::shared_ptr index; + + try { + auto start_time = METRICS_NOW_TIME; + index = to_index->BuildIndex(table_file.location_); + auto end_time = METRICS_NOW_TIME; + auto total_time = METRICS_MICROSECONDS(start_time, end_time); + server::Metrics::GetInstance().BuildIndexDurationSecondsHistogramObserve(total_time); + } catch (std::exception& ex) { + //typical error: out of gpu memory + std::string msg = "BuildIndex encounter exception" + std::string(ex.what()); + ENGINE_LOG_ERROR << msg; + + table_file.file_type_ = meta::TableFileSchema::TO_DELETE; + status = meta_ptr_->UpdateTableFile(table_file); + ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; + + std::cout << "ERROR: failed to build index, index file is too large or gpu memory is not enough" << std::endl; + + return Status::Error(msg); + } //step 4: if table has been deleted, dont save index file bool has_table = false; @@ -550,7 +566,22 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) { } //step 5: save index file - index->Serialize(); + try { + index->Serialize(); + } catch (std::exception& ex) { + //typical error: out of disk space or permition denied + std::string msg = "Serialize index encounter exception" + std::string(ex.what()); + ENGINE_LOG_ERROR << msg; + + table_file.file_type_ = meta::TableFileSchema::TO_DELETE; + status = meta_ptr_->UpdateTableFile(table_file); + ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete"; + + std::cout << "ERROR: failed to persist index file: " << table_file.location_ + << ", possible out of disk space" << std::endl; + + return Status::Error(msg); + } //step 6: update meta table_file.file_type_ = meta::TableFileSchema::INDEX; diff --git a/cpp/src/db/DBMetaImpl.cpp b/cpp/src/db/DBMetaImpl.cpp index 23c308c1ec1a3d4ff4ae685c61a1b4b3c9c701f7..e4ca01863b8e3792fe10d09cafd5dd719a46f0e6 100644 --- a/cpp/src/db/DBMetaImpl.cpp +++ b/cpp/src/db/DBMetaImpl.cpp @@ -932,7 +932,7 @@ Status DBMetaImpl::CleanUpFilesWithTTL(uint16_t seconds) { table_file.date_ = std::get<3>(file); utils::DeleteTableFilePath(options_, table_file); - ENGINE_LOG_DEBUG << "Removing file id:" << table_file.id_ << " location:" << table_file.location_; + ENGINE_LOG_DEBUG << "Removing file id:" << table_file.file_id_ << " location:" << table_file.location_; ConnectorPtr->remove(table_file.id_); }