提交 dad07c70 编写于 作者: H Heisenberg

MS-453 GPU search error when nprobe set more than 1024


Former-commit-id: a32159a5cca42d592efc24cac2f9acb9b35defb3
...@@ -26,6 +26,8 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -26,6 +26,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-467 - mysql db test failed - MS-467 - mysql db test failed
- MS-470 - Drop index success, which table not created - MS-470 - Drop index success, which table not created
- MS-471 - code coverage run failed - MS-471 - code coverage run failed
- MS-492 - Drop index failed if index have been created with index_type: FLAT
- MS-493 - Knowhere unittest crash
- MS-453 - GPU search error when nprobe set more than 1024 - MS-453 - GPU search error when nprobe set more than 1024
## Improvement ## Improvement
...@@ -85,6 +87,7 @@ Please mark all change in change log and use the ticket from JIRA. ...@@ -85,6 +87,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-455 - Distribute tasks by minimal cost in scheduler - MS-455 - Distribute tasks by minimal cost in scheduler
- MS-460 - Put transport speed as weight when choosing neighbour to execute task - MS-460 - Put transport speed as weight when choosing neighbour to execute task
- MS-459 - Add cache for pick function in tasktable - MS-459 - Add cache for pick function in tasktable
- MS-476 - Improve search performance
- MS-482 - Change search stream transport to unary in grpc - MS-482 - Change search stream transport to unary in grpc
- MS-487 - Define metric type in CreateTable - MS-487 - Define metric type in CreateTable
- MS-488 - Improve code format in scheduler - MS-488 - Improve code format in scheduler
......
...@@ -9,7 +9,7 @@ namespace zilliz { ...@@ -9,7 +9,7 @@ namespace zilliz {
namespace knowhere { namespace knowhere {
struct Resource { struct Resource {
Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) { explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) {
static int64_t global_id = 0; static int64_t global_id = 0;
id = global_id++; id = global_id++;
} }
...@@ -32,6 +32,11 @@ class FaissGpuResourceMgr { ...@@ -32,6 +32,11 @@ class FaissGpuResourceMgr {
static FaissGpuResourceMgr & static FaissGpuResourceMgr &
GetInstance(); GetInstance();
// Free gpu resource, avoid cudaGetDevice error when deallocate.
// this func should be invoke before main return
void
Free();
void void
AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size); AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
......
...@@ -282,7 +282,7 @@ void FaissGpuResourceMgr::InitResource() { ...@@ -282,7 +282,7 @@ void FaissGpuResourceMgr::InitResource() {
for(auto& device : devices_params_) { for(auto& device : devices_params_) {
auto& resource_vec = idle_[device.first]; auto& resource_vec = idle_[device.first];
for (int i = 0; i < device.second.resource_num; ++i) { for (int64_t i = 0; i < device.second.resource_num; ++i) {
auto res = std::make_shared<faiss::gpu::StandardGpuResources>(); auto res = std::make_shared<faiss::gpu::StandardGpuResources>();
// TODO(linxj): enable set pinned memory // TODO(linxj): enable set pinned memory
...@@ -351,6 +351,18 @@ void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res ...@@ -351,6 +351,18 @@ void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res
idle_[device_id].insert(it, res); idle_[device_id].insert(it, res);
} }
void FaissGpuResourceMgr::Free() {
for (auto &item : in_use_) {
auto& res_vec = item.second;
res_vec.clear();
}
for (auto &item : idle_) {
auto& res_vec = item.second;
res_vec.clear();
}
is_init = false;
}
void GPUIndex::SetGpuDevice(const int &gpu_id) { void GPUIndex::SetGpuDevice(const int &gpu_id) {
gpu_id_ = gpu_id; gpu_id_ = gpu_id;
} }
......
...@@ -26,6 +26,11 @@ class IDMAPTest : public DataGen, public ::testing::Test { ...@@ -26,6 +26,11 @@ class IDMAPTest : public DataGen, public ::testing::Test {
Init_with_default(); Init_with_default();
index_ = std::make_shared<IDMAP>(); index_ = std::make_shared<IDMAP>();
} }
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
}
protected: protected:
IDMAPPtr index_ = nullptr; IDMAPPtr index_ = nullptr;
}; };
......
...@@ -7,13 +7,11 @@ ...@@ -7,13 +7,11 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <iostream> #include <iostream>
#include <sstream>
#include <thread> #include <thread>
#include <faiss/AutoTune.h> #include <faiss/AutoTune.h>
#include <faiss/gpu/GpuAutoTune.h> #include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFFlat.h> #include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuClonerOptions.h>
#include "knowhere/index/vector_index/gpu_ivf.h" #include "knowhere/index/vector_index/gpu_ivf.h"
#include "knowhere/index/vector_index/ivf.h" #include "knowhere/index/vector_index/ivf.h"
...@@ -58,6 +56,9 @@ class IVFTest ...@@ -58,6 +56,9 @@ class IVFTest
index_ = IndexFactory(index_type); index_ = IndexFactory(index_type);
FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2); FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2);
} }
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
}
protected: protected:
std::string index_type; std::string index_type;
...@@ -369,6 +370,7 @@ class GPURESTEST ...@@ -369,6 +370,7 @@ class GPURESTEST
void TearDown() override { void TearDown() override {
delete ids; delete ids;
delete dis; delete dis;
FaissGpuResourceMgr::GetInstance().Free();
} }
protected: protected:
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <src/scheduler/SchedInst.h> #include <src/scheduler/SchedInst.h>
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "metrics/Metrics.h" #include "metrics/Metrics.h"
#include "DBWrapper.h" #include "DBWrapper.h"
...@@ -232,6 +233,7 @@ Server::StopService() { ...@@ -232,6 +233,7 @@ Server::StopService() {
grpc::GrpcMilvusServer::StopService(); grpc::GrpcMilvusServer::StopService();
DBWrapper::GetInstance().StopService(); DBWrapper::GetInstance().StopService();
engine::StopSchedulerService(); engine::StopSchedulerService();
knowhere::FaissGpuResourceMgr::GetInstance().Free(); // free gpu resource.
} }
} }
......
...@@ -955,15 +955,8 @@ DropIndexTask::OnExecute() { ...@@ -955,15 +955,8 @@ DropIndexTask::OnExecute() {
return SetError(res, "Invalid table name: " + table_name_); return SetError(res, "Invalid table name: " + table_name_);
} }
//step 2:check index existence //step 2: check table existence
engine::TableIndex index; auto stat = DBWrapper::DB()->DropIndex(table_name_);
engine::Status stat = DBWrapper::DB()->DescribeIndex(table_name_, index);
if (index.engine_type_ == 1) {
return SetError(SERVER_UNEXPECTED_ERROR, "index not existed");
}
//step 3: check table existence
stat = DBWrapper::DB()->DropIndex(table_name_);
if (!stat.ok()) { if (!stat.ok()) {
return SetError(DB_META_TRANSACTION_FAILED, stat.ToString()); return SetError(DB_META_TRANSACTION_FAILED, stat.ToString());
} }
......
...@@ -40,7 +40,7 @@ set(unittest_libs ...@@ -40,7 +40,7 @@ set(unittest_libs
add_subdirectory(server) add_subdirectory(server)
add_subdirectory(db) add_subdirectory(db)
#add_subdirectory(knowhere) add_subdirectory(knowhere)
add_subdirectory(metrics) add_subdirectory(metrics)
#add_subdirectory(scheduler) #add_subdirectory(scheduler)
#add_subdirectory(storage) #add_subdirectory(storage)
\ No newline at end of file
...@@ -40,6 +40,9 @@ class KnowhereWrapperTest ...@@ -40,6 +40,9 @@ class KnowhereWrapperTest
index_ = GetVecIndexFactory(index_type); index_ = GetVecIndexFactory(index_type);
} }
void TearDown() override {
zilliz::knowhere::FaissGpuResourceMgr::GetInstance().Free();
}
void AssertResult(const std::vector<long> &ids, const std::vector<float> &dis) { void AssertResult(const std::vector<long> &ids, const std::vector<float> &dis) {
EXPECT_EQ(ids.size(), nq * k); EXPECT_EQ(ids.size(), nq * k);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册