diff --git a/cpp/src/cache/Cache.h b/cpp/src/cache/Cache.h
index 606dc9eb07fb4bd967d17492ab2003b9eccde543..6151718530e4a8a906931aa1e6fda3f4c6b93e4d 100644
--- a/cpp/src/cache/Cache.h
+++ b/cpp/src/cache/Cache.h
@@ -46,7 +46,8 @@ public:
 
     double freemem_percent() const { return freemem_percent_; };
     void set_freemem_percent(double percent) { freemem_percent_ = percent; }
-    void set_gpu_ids(std::vector<uint64_t> gpu_ids) { gpu_ids_.assign(gpu_ids.begin(), gpu_ids.end()); }
+    void set_gpu_ids(std::vector<uint64_t>& gpu_ids) { gpu_ids_ = gpu_ids; }
+
     std::vector<uint64_t> gpu_ids() const { return gpu_ids_; }
 
     size_t size() const;
diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp
index 977c7e1c426e2b36daf0b75e84427637da766e02..eb3980da6145d264b4d6f9d96b5eac9bbbf48c58 100644
--- a/cpp/src/cache/CacheMgr.cpp
+++ b/cpp/src/cache/CacheMgr.cpp
@@ -56,6 +56,7 @@ engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) {
 }
 
 void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) {
+    std::cout << "dashalk\n";
     if(cache_ == nullptr) {
         SERVER_LOG_ERROR << "Cache doesn't exist";
         return;
@@ -130,6 +131,24 @@ void CacheMgr::SetCapacity(int64_t capacity) {
     cache_->set_capacity(capacity);
 }
 
+std::vector<uint64_t> CacheMgr::GpuIds() const {
+    if(cache_ == nullptr) {
+        SERVER_LOG_ERROR << "Cache doesn't exist";
+        std::vector<uint64_t> gpu_ids;
+        return gpu_ids;
+    }
+
+    return cache_->gpu_ids();
+}
+
+void CacheMgr::SetGpuIds(std::vector<uint64_t> gpu_ids){
+    if(cache_ == nullptr) {
+        SERVER_LOG_ERROR << "Cache doesn't exist";
+        return;
+    }
+    cache_->set_gpu_ids(gpu_ids);
+}
+
 }
 }
 }
diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h
index b6f1ec8ef1b9e8f4787ff18a61b21475784112f3..9abb30b92fb90dea83f755d91f2b9bebb14f0216 100644
--- a/cpp/src/cache/CacheMgr.h
+++ b/cpp/src/cache/CacheMgr.h
@@ -33,6 +33,8 @@ public:
     int64_t CacheUsage() const;
     int64_t CacheCapacity() const;
     void SetCapacity(int64_t capacity);
+    std::vector<uint64_t > GpuIds() const;
+    void SetGpuIds(std::vector<uint64_t> gpu_ids);
 
 protected:
     CacheMgr();
diff --git a/cpp/src/cache/CpuCacheMgr.h b/cpp/src/cache/CpuCacheMgr.h
index 8b0f98e6b481aac47721f311d11ca8d448789ea4..39e33aef8918be49269b08995b554efcaa7780ec 100644
--- a/cpp/src/cache/CpuCacheMgr.h
+++ b/cpp/src/cache/CpuCacheMgr.h
@@ -16,6 +16,7 @@ private:
     CpuCacheMgr();
 
 public:
+    //TODO: use smart pointer instead
     static CacheMgr* GetInstance() {
         static CpuCacheMgr s_mgr;
         return &s_mgr;
diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp
index 19be0d082187bbcbeeb586700e59eacdc9e1a30a..eb6b1dbeb78d1d95756eb1994362bff3354b8b24 100644
--- a/cpp/src/cache/GpuCacheMgr.cpp
+++ b/cpp/src/cache/GpuCacheMgr.cpp
@@ -13,35 +13,39 @@ namespace milvus {
 namespace cache {
 
 std::mutex GpuCacheMgr::mutex_;
-std::unordered_map<uint64_t, GpuCacheMgr*> GpuCacheMgr::instance_;
+std::unordered_map<uint64_t, GpuCacheMgrPtr> GpuCacheMgr::instance_;
 
 namespace {
     constexpr int64_t unit = 1024 * 1024 * 1024;
-}
 
-GpuCacheMgr::GpuCacheMgr() {
-    server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
-    std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1");
-    std::vector<uint64_t> gpu_ids;
-    for (auto i = 0; i < gpu_ids_str.length(); ) {
-        if (gpu_ids_str[i] != ',') {
-            int id = 0;
-            while (gpu_ids_str[i] != ',') {
-                id = id * 10 + gpu_ids_str[i] - '0';
+    void parse_gpu_ids(std::string gpu_ids_str, std::vector<uint64_t>& gpu_ids) {
+        for (auto i = 0; i < gpu_ids_str.length(); ) {
+            if (gpu_ids_str[i] != ',') {
+                int id = 0;
+                while (gpu_ids_str[i] <= '9' && gpu_ids_str[i] >= '0') {
+                    id = id * 10 + gpu_ids_str[i] - '0';
+                    ++i;
+                }
+                gpu_ids.push_back(id);
+            } else {
                 ++i;
             }
-            gpu_ids.push_back(id);
-        } else {
-            ++i;
         }
     }
+}
 
-    cache_->set_gpu_ids(gpu_ids);
+GpuCacheMgr::GpuCacheMgr() {
+    server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
+    std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1");
 
-    int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 1);
+    int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2);
     cap *= unit;
     cache_ = std::make_shared<Cache>(cap, 1UL<<32);
 
+    std::vector<uint64_t> gpu_ids;
+    parse_gpu_ids(gpu_ids_str, gpu_ids);
+    cache_->set_gpu_ids(gpu_ids);
+
     double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85);
     if (free_percent > 0.0 && free_percent <= 1.0) {
         cache_->set_freemem_percent(free_percent);
diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h
index a1d7d4be0d757ea52110a4c5d7ee4d5e942e7373..8c6a0c012ca30d1e04095227df173d8026c3265a 100644
--- a/cpp/src/cache/GpuCacheMgr.h
+++ b/cpp/src/cache/GpuCacheMgr.h
@@ -6,33 +6,34 @@
 
 #include "CacheMgr.h"
 #include <unordered_map>
+#include <memory>
 
 namespace zilliz {
 namespace milvus {
 namespace cache {
 
+class GpuCacheMgr;
+using GpuCacheMgrPtr = std::shared_ptr<GpuCacheMgr>;
+
 class GpuCacheMgr : public CacheMgr {
-private:
+public:
     GpuCacheMgr();
 
 public:
     static CacheMgr* GetInstance(uint64_t gpu_id) {
-        if (!instance_[gpu_id]) {
+        if (instance_.find(gpu_id) == instance_.end()) {
             std::lock_guard<std::mutex> lock(mutex_);
-            if(!instance_[gpu_id]) {
-                instance_.insert(std::pair<uint64_t, GpuCacheMgr* >(gpu_id, new GpuCacheMgr()));
-            }
+            instance_.insert(std::pair<uint64_t, GpuCacheMgrPtr>(gpu_id, std::make_shared<GpuCacheMgr>()));
+//            instance_[gpu_id] = std::make_shared<GpuCacheMgr>();
         }
-        return instance_.at(gpu_id);
-//        static GpuCacheMgr s_mgr;
-//        return &s_mgr;
+        return instance_[gpu_id].get();
     }
 
     void InsertItem(const std::string& key, const DataObjPtr& data) override;
 
 private:
     static std::mutex mutex_;
-    static std::unordered_map<uint64_t, GpuCacheMgr* > instance_;
+    static std::unordered_map<uint64_t, GpuCacheMgrPtr> instance_;
 };
 
 }
diff --git a/cpp/unittest/db/db_tests.cpp b/cpp/unittest/db/db_tests.cpp
index f2af4b773ac5f1b4669a00c0b7eabdad0ca5ead7..2426846c15c3ffe9321e56915d91ea232372df30 100644
--- a/cpp/unittest/db/db_tests.cpp
+++ b/cpp/unittest/db/db_tests.cpp
@@ -9,7 +9,6 @@
 #include "db/meta/MetaConsts.h"
 #include "db/Factories.h"
 #include "cache/CpuCacheMgr.h"
-#include "cache/GpuCacheMgr.h
 #include "utils/CommonUtil.h"
 
 #include <gtest/gtest.h>
@@ -438,9 +437,4 @@ TEST_F(DBTest2, DELETE_BY_RANGE_TEST) {
     ConvertTimeRangeToDBDates(start_value, end_value, dates);
 
     db_->DeleteTable(TABLE_NAME, dates);
-}
-
-TEST_F(DBTest, GPU_CACHE_MGR_TEST) {
-    std::vector<uint64_t > gpu_ids = cache::
-    cache::CpuCacheMgr::GetInstance()->CacheUsage();
 }
\ No newline at end of file
diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp
index 4d9379dc7342d2ec608caee0bbc66107a2c10d60..a4e19f0a980973f0589469ca75eefb2b87370022 100644
--- a/cpp/unittest/server/cache_test.cpp
+++ b/cpp/unittest/server/cache_test.cpp
@@ -146,7 +146,7 @@ TEST(CacheTest, CPU_CACHE_TEST) {
 }
 
 TEST(CacheTest, GPU_CACHE_TEST) {
-    cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance();
+    cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(0);
 
     const int dim = 256;
 
@@ -164,6 +164,25 @@ TEST(CacheTest, GPU_CACHE_TEST) {
 
     gpu_mgr->ClearCache();
     ASSERT_EQ(gpu_mgr->ItemCount(), 0);
+
+    gpu_mgr->SetCapacity(4096000000);
+    for (auto i = 0; i < 3; i++) {
+        MockVecIndex *mock_index = new MockVecIndex();
+        mock_index->ntotal_ = 1000000;  //2G
+        engine::VecIndexPtr index(mock_index);
+        cache::DataObjPtr data_obj = std::make_shared<cache::DataObj>(index);
+        std::cout << data_obj->size() <<std::endl;
+        gpu_mgr->InsertItem("index_" + std::to_string(i), data_obj);
+    }
+
+//    ASSERT_EQ(gpu_mgr->ItemCount(), 2);
+//    auto obj0 = gpu_mgr->GetItem("index_0");
+//    ASSERT_EQ(obj0, nullptr);
+//    auto obj1 = gpu_mgr->GetItem("index_1");
+//    auto obj2 = gpu_mgr->GetItem("index_2");
+    gpu_mgr->ClearCache();
+    ASSERT_EQ(gpu_mgr->ItemCount(), 0);
+
 }
 
 TEST(CacheTest, INVALID_TEST) {