提交 662320c0 编写于 作者: W wxyu

MS-412 Fix gpu cache logical error


Former-commit-id: 6bd2a056feee54393fa4bc16b1b233f54dac0500
上级 4023e110
......@@ -5,6 +5,8 @@ Please mark all change in change log and use the ticket from JIRA.
# Milvus 0.4.0 (2019-07-28)
## Bug
- MS-411 - Fix metric unittest linking error
- MS-412 - Fix gpu cache logical error
## Improvement
- MS-327 - Clean code for milvus
......@@ -80,7 +82,6 @@ Please mark all change in change log and use the ticket from JIRA.
- MS-330 - Stability test failed caused by server core dumped
- MS-347 - Build index hangs again
- MS-382 - fix MySQLMetaImpl::CleanUpFilesWithTTL unknown column bug
- MS-411 - Fix metric unittest linking error
## Improvement
- MS-156 - Add unittest for merge result functions
......
......@@ -65,21 +65,21 @@ resource_config:
memory: 64
device_id: 0
enable_loader: true
enable_executor: true
enable_executor: false
gtx1060:
type: GPU
memory: 6
device_id: 0
enable_loader: false
enable_executor: false
enable_loader: true
enable_executor: true
gtx1660:
type: GPU
memory: 6
device_id: 1
enable_loader: false
enable_executor: false
enable_loader: true
enable_executor: true
# connection list, length: 0~N
# format: -${resource_name}===${resource_name}
......
......@@ -139,9 +139,11 @@ Status ExecutionEngineImpl::Load(bool to_cache) {
}
Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_);
bool already_in_cache = (index_ != nullptr);
if (!index_) {
auto index = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_);
bool already_in_cache = (index != nullptr);
if (already_in_cache) {
index_ = index;
} else {
try {
index_ = index_->CopyToGpu(device_id);
ENGINE_LOG_DEBUG << "CPU to GPU" << device_id;
......@@ -161,9 +163,11 @@ Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
}
Status ExecutionEngineImpl::CopyToCpu() {
index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
bool already_in_cache = (index_ != nullptr);
if (!index_) {
auto index = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
bool already_in_cache = (index != nullptr);
if (already_in_cache) {
index_ = index;
} else {
try {
index_ = index_->CopyToCpu();
ENGINE_LOG_DEBUG << "GPU to CPU";
......@@ -175,7 +179,7 @@ Status ExecutionEngineImpl::CopyToCpu() {
}
}
if(!already_in_cache) {
if (!already_in_cache) {
Cache();
}
return Status::OK();
......@@ -276,7 +280,7 @@ Status ExecutionEngineImpl::Init() {
using namespace zilliz::milvus::server;
ServerConfig &config = ServerConfig::GetInstance();
ConfigNode server_config = config.GetConfig(CONFIG_SERVER);
gpu_num_ = server_config.GetInt32Value("gpu_index", 0);
gpu_num_ = server_config.GetInt32Value("gpu_index", 0);
return Status::OK();
}
......
......@@ -5,6 +5,7 @@
******************************************************************************/
#include <list>
#include <random>
#include "Action.h"
......@@ -38,6 +39,22 @@ push_task_round_robin(TaskTable &self_task_table, std::list<ResourcePtr> &neighb
}
}
void
push_task_randomly(TaskTable &self_task_table, std::vector<ResourcePtr> &neighbours) {
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_int_distribution<uint64_t> dist(0, neighbours.size() - 1);
CacheMgr cache;
auto indexes = PickToMove(self_task_table, cache, self_task_table.Size());
for (auto index : indexes) {
if (self_task_table.Move(index)) {
auto task = self_task_table.Get(index)->task;
neighbours[dist(mt)]->task_table().Put(task);
}
}
}
void
Action::PushTaskToNeighbour(const ResourceWPtr &res) {
auto self = res.lock();
......@@ -60,18 +77,21 @@ Action::PushTaskToNeighbourHasExecutor(const ResourceWPtr &res) {
auto self = res.lock();
if (not self) return;
std::list<ResourcePtr> neighbours;
std::list<ResourcePtr> l_neighbours;
std::vector<ResourcePtr> v_neighbours;
for (auto &neighbour_node : self->GetNeighbours()) {
auto node = neighbour_node.neighbour_node.lock();
if (not node) continue;
auto resource = std::static_pointer_cast<Resource>(node);
if (resource->HasExecutor()) {
neighbours.emplace_back(resource);
l_neighbours.push_back(resource);
v_neighbours.push_back(resource);
}
}
push_task_round_robin(self->task_table(), neighbours);
// push_task_round_robin(self->task_table(), l_neighbours);
push_task_randomly(self->task_table(), v_neighbours);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册