提交 ee7fffef 编写于 作者: P peng.xu

Merge branch 'branch-0.4.0' into 'branch-0.4.0'

MS-543 send error status to sdk when load index fail

See merge request megasearch/milvus!560

Former-commit-id: 556b4747a0a90dea0384852fa83c770b3d47563b
......@@ -34,6 +34,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-474 - Create index hang if use branch-0.3.1 server config
- MS-510 - unittest out of memory and crashed
- MS-507 - Dataset 10m-512, index type sq8,performance in-normal when set CPU_CACHE to 16 or 64
- MS-543 - SearchTask fail without exception
## Improvement
- MS-327 - Clean code for milvus
......
......@@ -424,6 +424,9 @@ Status DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSch
scheduler.Schedule(context);
context->WaitResult();
if (!context->GetStatus().ok()) {
return context->GetStatus();
}
//step 3: print time cost information
double load_cost = context->LoadCost();
......
......@@ -69,7 +69,7 @@ std::string Status::ToString() const {
type = "InvalidPath: ";
break;
default:
snprintf(tmp, sizeof(tmp), "Unkown code(%d): ",
snprintf(tmp, sizeof(tmp), "Error code(0x%x): ",
static_cast<int>(code()));
type = tmp;
break;
......
......@@ -38,7 +38,9 @@ public:
const ResultSet& GetResult() const { return result_; }
ResultSet& GetResult() { return result_; }
std::string Identity() const { return identity_; }
const std::string& Identity() const { return identity_; }
const Status& GetStatus() const { return status_; }
Status& GetStatus() { return status_; }
void IndexSearchDone(size_t index_id);
void WaitResult();
......@@ -64,6 +66,7 @@ private:
std::condition_variable done_cond_;
std::string identity_; //for debug
Status status_;
double time_cost_load_ = 0.0; //time cost for load all index files, unit: us
double time_cost_search_ = 0.0; //time cost for entire search, unit: us
......
......@@ -61,7 +61,7 @@ static ::PROTOBUF_NAMESPACE_ID::Message const * const file_default_instances[] =
const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE(protodesc_cold) =
"\n\014status.proto\022\013milvus.grpc\"D\n\006Status\022*\n"
"\nerror_code\030\001 \001(\0162\026.milvus.grpc.ErrorCod"
"e\022\016\n\006reason\030\002 \001(\t*\230\004\n\tErrorCode\022\013\n\007SUCCE"
"e\022\016\n\006reason\030\002 \001(\t*\253\004\n\tErrorCode\022\013\n\007SUCCE"
"SS\020\000\022\024\n\020UNEXPECTED_ERROR\020\001\022\022\n\016CONNECT_FA"
"ILED\020\002\022\025\n\021PERMISSION_DENIED\020\003\022\024\n\020TABLE_N"
"OT_EXISTS\020\004\022\024\n\020ILLEGAL_ARGUMENT\020\005\022\021\n\rILL"
......@@ -74,8 +74,8 @@ const char descriptor_table_protodef_status_2eproto[] PROTOBUF_SECTION_VARIABLE(
"CREATE_FOLDER\020\021\022\026\n\022CANNOT_CREATE_FILE\020\022\022"
"\030\n\024CANNOT_DELETE_FOLDER\020\023\022\026\n\022CANNOT_DELE"
"TE_FILE\020\024\022\025\n\021BUILD_INDEX_ERROR\020\025\022\021\n\rILLE"
"GAL_NLIST\020\026\022\027\n\023ILLEGAL_METRIC_TYPE\020\027b\006pr"
"oto3"
"GAL_NLIST\020\026\022\027\n\023ILLEGAL_METRIC_TYPE\020\027\022\021\n\r"
"OUT_OF_MEMORY\020\030b\006proto3"
;
static const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable*const descriptor_table_status_2eproto_deps[1] = {
};
......@@ -85,7 +85,7 @@ static ::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase*const descriptor_table_sta
static ::PROTOBUF_NAMESPACE_ID::internal::once_flag descriptor_table_status_2eproto_once;
static bool descriptor_table_status_2eproto_initialized = false;
const ::PROTOBUF_NAMESPACE_ID::internal::DescriptorTable descriptor_table_status_2eproto = {
&descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 644,
&descriptor_table_status_2eproto_initialized, descriptor_table_protodef_status_2eproto, "status.proto", 663,
&descriptor_table_status_2eproto_once, descriptor_table_status_2eproto_sccs, descriptor_table_status_2eproto_deps, 1, 0,
schemas, file_default_instances, TableStruct_status_2eproto::offsets,
file_level_metadata_status_2eproto, 1, file_level_enum_descriptors_status_2eproto, file_level_service_descriptors_status_2eproto,
......@@ -125,6 +125,7 @@ bool ErrorCode_IsValid(int value) {
case 21:
case 22:
case 23:
case 24:
return true;
default:
return false;
......
......@@ -93,12 +93,13 @@ enum ErrorCode : int {
BUILD_INDEX_ERROR = 21,
ILLEGAL_NLIST = 22,
ILLEGAL_METRIC_TYPE = 23,
OUT_OF_MEMORY = 24,
ErrorCode_INT_MIN_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::min(),
ErrorCode_INT_MAX_SENTINEL_DO_NOT_USE_ = std::numeric_limits<::PROTOBUF_NAMESPACE_ID::int32>::max()
};
bool ErrorCode_IsValid(int value);
constexpr ErrorCode ErrorCode_MIN = SUCCESS;
constexpr ErrorCode ErrorCode_MAX = ILLEGAL_METRIC_TYPE;
constexpr ErrorCode ErrorCode_MAX = OUT_OF_MEMORY;
constexpr int ErrorCode_ARRAYSIZE = ErrorCode_MAX + 1;
const ::PROTOBUF_NAMESPACE_ID::EnumDescriptor* ErrorCode_descriptor();
......
......@@ -27,6 +27,7 @@ enum ErrorCode {
BUILD_INDEX_ERROR = 21;
ILLEGAL_NLIST = 22;
ILLEGAL_METRIC_TYPE = 23;
OUT_OF_MEMORY = 24;
}
message Status {
......
......@@ -98,14 +98,18 @@ XSearchTask::Load(LoadType type, uint8_t device_id) {
server::TimeRecorder rc("");
Status stat = Status::OK();
std::string error_msg;
std::string type_str;
try {
if (type == LoadType::DISK2CPU) {
stat = index_engine_->Load();
type_str = "DISK2CPU";
} else if (type == LoadType::CPU2GPU) {
stat = index_engine_->CopyToGpu(device_id);
type_str = "CPU2GPU";
} else if (type == LoadType::GPU2CPU) {
stat = index_engine_->CopyToCpu();
type_str = "GPU2CPU";
} else {
error_msg = "Wrong load type";
stat = Status(SERVER_UNEXPECTED_ERROR, error_msg);
......@@ -117,13 +121,18 @@ XSearchTask::Load(LoadType type, uint8_t device_id) {
}
if (!stat.ok()) {
if (error_msg.empty())
error_msg = std::string("Failed to load index file: file not available");
//typical error: file not available
ENGINE_LOG_ERROR << error_msg;
Status s;
if (stat.ToString().find("out of memory") != std::string::npos) {
error_msg = "out of memory: " + type_str;
s = Status(SERVER_OUT_OF_MEMORY, error_msg);
} else {
error_msg = "Failed to load index file: " + type_str;
s = Status(SERVER_UNEXPECTED_ERROR, error_msg);
}
for (auto &context : search_contexts_) {
context->IndexSearchDone(file_->id_);//mark as done avoid dead lock, even failed
context->GetStatus() = s;
}
return;
......
......@@ -48,6 +48,7 @@ namespace {
{SERVER_CACHE_ERROR, ::milvus::grpc::ErrorCode::CACHE_FAILED},
{DB_META_TRANSACTION_FAILED, ::milvus::grpc::ErrorCode::META_FAILED},
{SERVER_BUILD_INDEX_ERROR, ::milvus::grpc::ErrorCode::BUILD_INDEX_ERROR},
{SERVER_OUT_OF_MEMORY, ::milvus::grpc::ErrorCode::OUT_OF_MEMORY},
};
if(code_map.find(code) != code_map.end()) {
......
......@@ -672,7 +672,7 @@ SearchTask::OnExecute() {
rc.RecordSection("search vectors from engine");
if (!stat.ok()) {
return SetError(DB_META_TRANSACTION_FAILED, stat.ToString());
return SetError(stat.code(), stat.ToString());
}
if (results.empty()) {
......
......@@ -66,6 +66,7 @@ constexpr ErrorCode SERVER_INVALID_NPROBE = ToServerErrorCode(113);
constexpr ErrorCode SERVER_INVALID_INDEX_NLIST = ToServerErrorCode(114);
constexpr ErrorCode SERVER_INVALID_INDEX_METRIC_TYPE = ToServerErrorCode(115);
constexpr ErrorCode SERVER_INVALID_INDEX_FILE_SIZE = ToServerErrorCode(116);
constexpr ErrorCode SERVER_OUT_OF_MEMORY = ToServerErrorCode(117);
//db error code
constexpr ErrorCode DB_META_TRANSACTION_FAILED = ToDbErrorCode(1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册