提交 dad07c70 编写于 作者: H Heisenberg

MS-453 GPU search error when nprobe set more than 1024


Former-commit-id: a32159a5cca42d592efc24cac2f9acb9b35defb3
......@@ -26,6 +26,8 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-467 - mysql db test failed
- MS-470 - Drop index success, which table not created
- MS-471 - code coverage run failed
- MS-492 - Drop index failed if index have been created with index_type: FLAT
- MS-493 - Knowhere unittest crash
- MS-453 - GPU search error when nprobe set more than 1024
## Improvement
......@@ -85,6 +87,7 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-455 - Distribute tasks by minimal cost in scheduler
- MS-460 - Put transport speed as weight when choosing neighbour to execute task
- MS-459 - Add cache for pick function in tasktable
- MS-476 - Improve search performance
- MS-482 - Change search stream transport to unary in grpc
- MS-487 - Define metric type in CreateTable
- MS-488 - Improve code format in scheduler
......
......@@ -9,7 +9,7 @@ namespace zilliz {
namespace knowhere {
struct Resource {
Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) {
explicit Resource(std::shared_ptr<faiss::gpu::StandardGpuResources> &r): faiss_res(r) {
static int64_t global_id = 0;
id = global_id++;
}
......@@ -32,6 +32,11 @@ class FaissGpuResourceMgr {
static FaissGpuResourceMgr &
GetInstance();
// Free gpu resource, avoid cudaGetDevice error when deallocate.
// this func should be invoke before main return
void
Free();
void
AllocateTempMem(ResPtr &resource, const int64_t& device_id, const int64_t& size);
......
......@@ -282,7 +282,7 @@ void FaissGpuResourceMgr::InitResource() {
for(auto& device : devices_params_) {
auto& resource_vec = idle_[device.first];
for (int i = 0; i < device.second.resource_num; ++i) {
for (int64_t i = 0; i < device.second.resource_num; ++i) {
auto res = std::make_shared<faiss::gpu::StandardGpuResources>();
// TODO(linxj): enable set pinned memory
......@@ -351,6 +351,18 @@ void FaissGpuResourceMgr::MoveToIdle(const int64_t &device_id, const ResPtr &res
idle_[device_id].insert(it, res);
}
void FaissGpuResourceMgr::Free() {
for (auto &item : in_use_) {
auto& res_vec = item.second;
res_vec.clear();
}
for (auto &item : idle_) {
auto& res_vec = item.second;
res_vec.clear();
}
is_init = false;
}
void GPUIndex::SetGpuDevice(const int &gpu_id) {
gpu_id_ = gpu_id;
}
......
......@@ -26,6 +26,11 @@ class IDMAPTest : public DataGen, public ::testing::Test {
Init_with_default();
index_ = std::make_shared<IDMAP>();
}
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
}
protected:
IDMAPPtr index_ = nullptr;
};
......
......@@ -7,13 +7,11 @@
#include <gtest/gtest.h>
#include <iostream>
#include <sstream>
#include <thread>
#include <faiss/AutoTune.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuClonerOptions.h>
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "knowhere/index/vector_index/ivf.h"
......@@ -58,6 +56,9 @@ class IVFTest
index_ = IndexFactory(index_type);
FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024*1024*200, 1024*1024*300, 2);
}
void TearDown() override {
FaissGpuResourceMgr::GetInstance().Free();
}
protected:
std::string index_type;
......@@ -369,6 +370,7 @@ class GPURESTEST
void TearDown() override {
delete ids;
delete dis;
FaissGpuResourceMgr::GetInstance().Free();
}
protected:
......
......@@ -19,6 +19,7 @@
#include <unistd.h>
#include <string.h>
#include <src/scheduler/SchedInst.h>
#include "knowhere/index/vector_index/gpu_ivf.h"
#include "metrics/Metrics.h"
#include "DBWrapper.h"
......@@ -232,6 +233,7 @@ Server::StopService() {
grpc::GrpcMilvusServer::StopService();
DBWrapper::GetInstance().StopService();
engine::StopSchedulerService();
knowhere::FaissGpuResourceMgr::GetInstance().Free(); // free gpu resource.
}
}
......
......@@ -955,15 +955,8 @@ DropIndexTask::OnExecute() {
return SetError(res, "Invalid table name: " + table_name_);
}
//step 2:check index existence
engine::TableIndex index;
engine::Status stat = DBWrapper::DB()->DescribeIndex(table_name_, index);
if (index.engine_type_ == 1) {
return SetError(SERVER_UNEXPECTED_ERROR, "index not existed");
}
//step 3: check table existence
stat = DBWrapper::DB()->DropIndex(table_name_);
//step 2: check table existence
auto stat = DBWrapper::DB()->DropIndex(table_name_);
if (!stat.ok()) {
return SetError(DB_META_TRANSACTION_FAILED, stat.ToString());
}
......
......@@ -40,7 +40,7 @@ set(unittest_libs
add_subdirectory(server)
add_subdirectory(db)
#add_subdirectory(knowhere)
add_subdirectory(knowhere)
add_subdirectory(metrics)
#add_subdirectory(scheduler)
#add_subdirectory(storage)
\ No newline at end of file
......@@ -40,6 +40,9 @@ class KnowhereWrapperTest
index_ = GetVecIndexFactory(index_type);
}
void TearDown() override {
zilliz::knowhere::FaissGpuResourceMgr::GetInstance().Free();
}
void AssertResult(const std::vector<long> &ids, const std::vector<float> &dis) {
EXPECT_EQ(ids.size(), nq * k);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册