diff --git a/cpp/conf/server_config.template b/cpp/conf/server_config.template
index c80e981bcd002e6d4d151c0231add6d852d3cd97..107b4a10605f4c530e1d7232c4e39d7dfb1de33a 100644
--- a/cpp/conf/server_config.template
+++ b/cpp/conf/server_config.template
@@ -36,8 +36,11 @@ license_config:         # license configure
 
 cache_config:           # cache configure
   cpu_cache_capacity: 16   # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory
-  cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
+  cpu_cache_free_percent: 0.85 # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
   insert_cache_immediately: false  # insert data will be load into cache immediately for hot query
+  gpu_cache_capacity: 5    # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory
+  gpu_cache_free_percent: 0.85     # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
+  gpu_ids: 0,1             # gpu id
 
 engine_config:
   nprobe: 10
diff --git a/cpp/src/cache/Cache.h b/cpp/src/cache/Cache.h
index 4d6f32b9eb0af65ae744cbebab01d08dcf6f3905..6151718530e4a8a906931aa1e6fda3f4c6b93e4d 100644
--- a/cpp/src/cache/Cache.h
+++ b/cpp/src/cache/Cache.h
@@ -46,6 +46,9 @@ public:
 
     double freemem_percent() const { return freemem_percent_; };
     void set_freemem_percent(double percent) { freemem_percent_ = percent; }
+    void set_gpu_ids(std::vector<uint64_t>& gpu_ids) { gpu_ids_ = gpu_ids; }
+
+    std::vector<uint64_t> gpu_ids() const { return gpu_ids_; }
 
     size_t size() const;
     bool exists(const std::string& key);
@@ -60,6 +63,7 @@ private:
     int64_t usage_;
     int64_t capacity_;
     double freemem_percent_;
+    std::vector<uint64_t> gpu_ids_;
 
     LRU<std::string, CacheObjPtr> lru_;
     mutable std::mutex mutex_;
diff --git a/cpp/src/cache/CacheMgr.cpp b/cpp/src/cache/CacheMgr.cpp
index 977c7e1c426e2b36daf0b75e84427637da766e02..eb3980da6145d264b4d6f9d96b5eac9bbbf48c58 100644
--- a/cpp/src/cache/CacheMgr.cpp
+++ b/cpp/src/cache/CacheMgr.cpp
@@ -56,6 +56,7 @@ engine::VecIndexPtr CacheMgr::GetIndex(const std::string& key) {
 }
 
 void CacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) {
+    std::cout << "dashalk\n";
     if(cache_ == nullptr) {
         SERVER_LOG_ERROR << "Cache doesn't exist";
         return;
@@ -130,6 +131,24 @@ void CacheMgr::SetCapacity(int64_t capacity) {
     cache_->set_capacity(capacity);
 }
 
+std::vector<uint64_t> CacheMgr::GpuIds() const {
+    if(cache_ == nullptr) {
+        SERVER_LOG_ERROR << "Cache doesn't exist";
+        std::vector<uint64_t> gpu_ids;
+        return gpu_ids;
+    }
+
+    return cache_->gpu_ids();
+}
+
+void CacheMgr::SetGpuIds(std::vector<uint64_t> gpu_ids){
+    if(cache_ == nullptr) {
+        SERVER_LOG_ERROR << "Cache doesn't exist";
+        return;
+    }
+    cache_->set_gpu_ids(gpu_ids);
+}
+
 }
 }
 }
diff --git a/cpp/src/cache/CacheMgr.h b/cpp/src/cache/CacheMgr.h
index b6f1ec8ef1b9e8f4787ff18a61b21475784112f3..9abb30b92fb90dea83f755d91f2b9bebb14f0216 100644
--- a/cpp/src/cache/CacheMgr.h
+++ b/cpp/src/cache/CacheMgr.h
@@ -33,6 +33,8 @@ public:
     int64_t CacheUsage() const;
     int64_t CacheCapacity() const;
     void SetCapacity(int64_t capacity);
+    std::vector<uint64_t > GpuIds() const;
+    void SetGpuIds(std::vector<uint64_t> gpu_ids);
 
 protected:
     CacheMgr();
diff --git a/cpp/src/cache/CpuCacheMgr.h b/cpp/src/cache/CpuCacheMgr.h
index 8b0f98e6b481aac47721f311d11ca8d448789ea4..39e33aef8918be49269b08995b554efcaa7780ec 100644
--- a/cpp/src/cache/CpuCacheMgr.h
+++ b/cpp/src/cache/CpuCacheMgr.h
@@ -16,6 +16,7 @@ private:
     CpuCacheMgr();
 
 public:
+    //TODO: use smart pointer instead
     static CacheMgr* GetInstance() {
         static CpuCacheMgr s_mgr;
         return &s_mgr;
diff --git a/cpp/src/cache/GpuCacheMgr.cpp b/cpp/src/cache/GpuCacheMgr.cpp
index 13eec4f2b661e148640492eabb40ad701a1d0f50..eb6b1dbeb78d1d95756eb1994362bff3354b8b24 100644
--- a/cpp/src/cache/GpuCacheMgr.cpp
+++ b/cpp/src/cache/GpuCacheMgr.cpp
@@ -4,6 +4,7 @@
 // Proprietary and confidential.
 ////////////////////////////////////////////////////////////////////////////////
 
+#include "utils/Log.h"
 #include "GpuCacheMgr.h"
 #include "server/ServerConfig.h"
 
@@ -11,19 +12,57 @@ namespace zilliz {
 namespace milvus {
 namespace cache {
 
+std::mutex GpuCacheMgr::mutex_;
+std::unordered_map<uint64_t, GpuCacheMgrPtr> GpuCacheMgr::instance_;
+
 namespace {
     constexpr int64_t unit = 1024 * 1024 * 1024;
+
+    void parse_gpu_ids(std::string gpu_ids_str, std::vector<uint64_t>& gpu_ids) {
+        for (auto i = 0; i < gpu_ids_str.length(); ) {
+            if (gpu_ids_str[i] != ',') {
+                int id = 0;
+                while (gpu_ids_str[i] <= '9' && gpu_ids_str[i] >= '0') {
+                    id = id * 10 + gpu_ids_str[i] - '0';
+                    ++i;
+                }
+                gpu_ids.push_back(id);
+            } else {
+                ++i;
+            }
+        }
+    }
 }
 
 GpuCacheMgr::GpuCacheMgr() {
     server::ConfigNode& config = server::ServerConfig::GetInstance().GetConfig(server::CONFIG_CACHE);
-    int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 1);
+    std::string gpu_ids_str = config.GetValue(server::CONFIG_GPU_IDS, "0,1");
+
+    int64_t cap = config.GetInt64Value(server::CONFIG_GPU_CACHE_CAPACITY, 2);
     cap *= unit;
     cache_ = std::make_shared<Cache>(cap, 1UL<<32);
+
+    std::vector<uint64_t> gpu_ids;
+    parse_gpu_ids(gpu_ids_str, gpu_ids);
+    cache_->set_gpu_ids(gpu_ids);
+
+    double free_percent = config.GetDoubleValue(server::GPU_CACHE_FREE_PERCENT, 0.85);
+    if (free_percent > 0.0 && free_percent <= 1.0) {
+        cache_->set_freemem_percent(free_percent);
+    } else {
+        SERVER_LOG_ERROR << "Invalid gpu_cache_free_percent: " << free_percent <<
+                         ", defaultly set to " << cache_->freemem_percent();
+    }
 }
 
 void GpuCacheMgr::InsertItem(const std::string& key, const DataObjPtr& data) {
     //TODO: copy data to gpu
+    if (cache_ == nullptr) {
+        SERVER_LOG_ERROR << "Cache doesn't exist";
+        return;
+    }
+
+    cache_->insert(key, data);
 }
 
 }
diff --git a/cpp/src/cache/GpuCacheMgr.h b/cpp/src/cache/GpuCacheMgr.h
index 4efec08cec772225d750b8a3df567b3cdb37dc1f..8c6a0c012ca30d1e04095227df173d8026c3265a 100644
--- a/cpp/src/cache/GpuCacheMgr.h
+++ b/cpp/src/cache/GpuCacheMgr.h
@@ -5,22 +5,35 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include "CacheMgr.h"
+#include <unordered_map>
+#include <memory>
 
 namespace zilliz {
 namespace milvus {
 namespace cache {
 
+class GpuCacheMgr;
+using GpuCacheMgrPtr = std::shared_ptr<GpuCacheMgr>;
+
 class GpuCacheMgr : public CacheMgr {
-private:
+public:
     GpuCacheMgr();
 
 public:
-    static CacheMgr* GetInstance() {
-        static GpuCacheMgr s_mgr;
-        return &s_mgr;
+    static CacheMgr* GetInstance(uint64_t gpu_id) {
+        if (instance_.find(gpu_id) == instance_.end()) {
+            std::lock_guard<std::mutex> lock(mutex_);
+            instance_.insert(std::pair<uint64_t, GpuCacheMgrPtr>(gpu_id, std::make_shared<GpuCacheMgr>()));
+//            instance_[gpu_id] = std::make_shared<GpuCacheMgr>();
+        }
+        return instance_[gpu_id].get();
     }
 
     void InsertItem(const std::string& key, const DataObjPtr& data) override;
+
+private:
+    static std::mutex mutex_;
+    static std::unordered_map<uint64_t, GpuCacheMgrPtr> instance_;
 };
 
 }
diff --git a/cpp/src/db/DBImpl.cpp b/cpp/src/db/DBImpl.cpp
index 07d95fe950b4ce7bbb44acf28afdde44928eaee1..3e8c812b5761d7f3261f840f63c0acc9dc11b6fe 100644
--- a/cpp/src/db/DBImpl.cpp
+++ b/cpp/src/db/DBImpl.cpp
@@ -342,7 +342,7 @@ void DBImpl::StartMetricTask() {
     server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
     int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
     int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
-    server::Metrics::GetInstance().CacheUsageGaugeSet(cache_usage*100/cache_total);
+    server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage*100/cache_total);
     uint64_t size;
     Size(size);
     server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
diff --git a/cpp/src/db/engine/ExecutionEngine.h b/cpp/src/db/engine/ExecutionEngine.h
index 0f2cf42b220a79dc16c2866eae4a7efd991df64c..d0fd1d66302baab8d4786cf06a3e31d3b58728eb 100644
--- a/cpp/src/db/engine/ExecutionEngine.h
+++ b/cpp/src/db/engine/ExecutionEngine.h
@@ -64,6 +64,8 @@ public:
 
     virtual Status Cache() = 0;
 
+    virtual Status GpuCache(uint64_t gpu_id) = 0;
+
     virtual Status Init() = 0;
 };
 
diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp
index dd38369832f3b94578cfc742dd71486bb33568fb..066e63c1cf3812eeffceb5137c8693685f7636fc 100644
--- a/cpp/src/db/engine/ExecutionEngineImpl.cpp
+++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp
@@ -4,6 +4,7 @@
  * Proprietary and confidential.
  ******************************************************************************/
 #include <stdexcept>
+#include "src/cache/GpuCacheMgr.h"
 
 #include "src/server/ServerConfig.h"
 #include "src/metrics/Metrics.h"
@@ -144,28 +145,60 @@ Status ExecutionEngineImpl::Load(bool to_cache) {
 }
 
 Status ExecutionEngineImpl::CopyToGpu(uint64_t device_id) {
-    try {
-        index_ = index_->CopyToGpu(device_id);
-        ENGINE_LOG_DEBUG << "CPU to GPU" << device_id;
-    } catch (knowhere::KnowhereException &e) {
-        ENGINE_LOG_ERROR << e.what();
-        return Status::Error(e.what());
-    } catch (std::exception &e) {
-        return Status::Error(e.what());
+    index_ = zilliz::milvus::cache::GpuCacheMgr::GetInstance(device_id)->GetIndex(location_);
+    bool already_in_cache = (index_ != nullptr);
+    auto start_time = METRICS_NOW_TIME;
+    if (!index_) {
+        try {
+            index_ = index_->CopyToGpu(device_id);
+            ENGINE_LOG_DEBUG << "CPU to GPU" << device_id;
+        } catch (knowhere::KnowhereException &e) {
+            ENGINE_LOG_ERROR << e.what();
+            return Status::Error(e.what());
+        } catch (std::exception &e) {
+            return Status::Error(e.what());
+        }
     }
+
+    if (!already_in_cache) {
+        GpuCache(device_id);
+        auto end_time = METRICS_NOW_TIME;
+        auto total_time = METRICS_MICROSECONDS(start_time, end_time);
+        double physical_size = PhysicalSize();
+
+        server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time);
+        server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size);
+    }
+
     return Status::OK();
 }
 
 Status ExecutionEngineImpl::CopyToCpu() {
-    try {
-        index_ = index_->CopyToCpu();
-        ENGINE_LOG_DEBUG << "GPU to CPU";
-    } catch (knowhere::KnowhereException &e) {
-        ENGINE_LOG_ERROR << e.what();
-        return Status::Error(e.what());
-    } catch (std::exception &e) {
-        return Status::Error(e.what());
+    index_ = zilliz::milvus::cache::CpuCacheMgr::GetInstance()->GetIndex(location_);
+    bool already_in_cache = (index_ != nullptr);
+    auto start_time = METRICS_NOW_TIME;
+    if (!index_) {
+        try {
+            index_ = index_->CopyToCpu();
+            ENGINE_LOG_DEBUG << "GPU to CPU";
+        } catch (knowhere::KnowhereException &e) {
+            ENGINE_LOG_ERROR << e.what();
+            return Status::Error(e.what());
+        } catch (std::exception &e) {
+            return Status::Error(e.what());
+        }
+    }
+
+    if(!already_in_cache) {
+        Cache();
+        auto end_time = METRICS_NOW_TIME;
+        auto total_time = METRICS_MICROSECONDS(start_time, end_time);
+        double physical_size = PhysicalSize();
+
+        server::Metrics::GetInstance().FaissDiskLoadDurationSecondsHistogramObserve(total_time);
+        server::Metrics::GetInstance().FaissDiskLoadIOSpeedGaugeSet(physical_size);
     }
+
     return Status::OK();
 }
 
@@ -246,6 +279,10 @@ Status ExecutionEngineImpl::Cache() {
     return Status::OK();
 }
 
+Status ExecutionEngineImpl::GpuCache(uint64_t gpu_id) {
+    zilliz::milvus::cache::GpuCacheMgr::GetInstance(gpu_id)->InsertItem(location_, index_);
+}
+
 // TODO(linxj): remove.
 Status ExecutionEngineImpl::Init() {
     using namespace zilliz::milvus::server;
diff --git a/cpp/src/db/engine/ExecutionEngineImpl.h b/cpp/src/db/engine/ExecutionEngineImpl.h
index 948719310cd380a984fafc1d51c6a96a354b162a..01af0f4a9b6f434f14999e70e8eaf323cb153767 100644
--- a/cpp/src/db/engine/ExecutionEngineImpl.h
+++ b/cpp/src/db/engine/ExecutionEngineImpl.h
@@ -59,6 +59,8 @@ public:
 
     Status Cache() override;
 
+    Status GpuCache(uint64_t gpu_id) override;
+
     Status Init() override;
 
 private:
diff --git a/cpp/src/metrics/MetricBase.h b/cpp/src/metrics/MetricBase.h
index 23a2427b3537f66a0747c1d6ba25c172014bc922..a11bf14179aa6576f71f33a7724a9f7ca72c9f5d 100644
--- a/cpp/src/metrics/MetricBase.h
+++ b/cpp/src/metrics/MetricBase.h
@@ -31,7 +31,8 @@ class MetricsBase{
     virtual void IndexFileSizeHistogramObserve(double value) {};
     virtual void BuildIndexDurationSecondsHistogramObserve(double value) {};
 
-    virtual void CacheUsageGaugeSet(double value) {};
+    virtual void CpuCacheUsageGaugeSet(double value) {};
+    virtual void GpuCacheUsageGaugeSet(double value) {};
 
     virtual void MetaAccessTotalIncrement(double value = 1) {};
     virtual void MetaAccessDurationSecondsHistogramObserve(double value) {};
diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp
index c7729ffdbca62408ffad4ecb8379266a690b03ca..08dad64724bf55be408c821f6b12b792bf522f57 100644
--- a/cpp/src/metrics/PrometheusMetrics.cpp
+++ b/cpp/src/metrics/PrometheusMetrics.cpp
@@ -4,6 +4,7 @@
  * Proprietary and confidential.
  ******************************************************************************/
 
+#include <cache/GpuCacheMgr.h>
 #include "PrometheusMetrics.h"
 #include "utils/Log.h"
 #include "SystemInfo.h"
@@ -166,6 +167,18 @@ void PrometheusMetrics::CPUTemperature() {
     }
 }
 
+void PrometheusMetrics::GpuCacheUsageGaugeSet(double value) {
+    if(!startup_) return;
+    int64_t num_processors = server::SystemInfo::GetInstance().num_processor();
+
+    for (auto i = 0; i < num_processors; ++i) {
+//        int gpu_cache_usage = cache::GpuCacheMgr::GetInstance(i)->CacheUsage();
+//        int gpu_cache_total = cache::GpuCacheMgr::GetInstance(i)->CacheCapacity();
+//        prometheus::Gauge &gpu_cache = gpu_cache_usage_.Add({{"GPU_Cache", std::to_string(i)}});
+//        gpu_cache.Set(gpu_cache_usage * 100 / gpu_cache_total);
+    }
+}
+
 }
 }
 }
diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h
index 282c58800ca5bf08ecdd9f0af123ee943dbf2904..ab37195583b87cc666d1a18725d540538be220d2 100644
--- a/cpp/src/metrics/PrometheusMetrics.h
+++ b/cpp/src/metrics/PrometheusMetrics.h
@@ -54,7 +54,8 @@ class PrometheusMetrics: public MetricsBase {
     void RawFileSizeHistogramObserve(double value) override { if(startup_) raw_files_size_histogram_.Observe(value);};
     void IndexFileSizeHistogramObserve(double value) override { if(startup_) index_files_size_histogram_.Observe(value);};
     void BuildIndexDurationSecondsHistogramObserve(double value) override { if(startup_) build_index_duration_seconds_histogram_.Observe(value);};
-    void CacheUsageGaugeSet(double value) override { if(startup_) cache_usage_gauge_.Set(value);};
+    void CpuCacheUsageGaugeSet(double value) override { if(startup_) cpu_cache_usage_gauge_.Set(value);};
+    void GpuCacheUsageGaugeSet(double value) override; 
 
     void MetaAccessTotalIncrement(double value = 1) override { if(startup_) meta_access_total_.Increment(value);};
     void MetaAccessDurationSecondsHistogramObserve(double value) override { if(startup_) meta_access_duration_seconds_histogram_.Observe(value);};
@@ -336,12 +337,18 @@ class PrometheusMetrics: public MetricsBase {
         .Register(*registry_);
     prometheus::Counter &cache_access_total_ = cache_access_.Add({});
 
-    // record cache usage and %
-    prometheus::Family<prometheus::Gauge> &cache_usage_ = prometheus::BuildGauge()
+    // record CPU cache usage and %
+    prometheus::Family<prometheus::Gauge> &cpu_cache_usage_ = prometheus::BuildGauge()
         .Name("cache_usage_bytes")
         .Help("current cache usage by bytes")
         .Register(*registry_);
-    prometheus::Gauge &cache_usage_gauge_ = cache_usage_.Add({});
+    prometheus::Gauge &cpu_cache_usage_gauge_ = cpu_cache_usage_.Add({});
+    
+    //record GPU cache usage and %
+    prometheus::Family<prometheus::Gauge> &gpu_cache_usage_ = prometheus::BuildGauge()
+            .Name("gpu_cache_usage_bytes")
+            .Help("current gpu cache usage by bytes")
+            .Register(*registry_);
 
     // record query response
     using Quantiles = std::vector<prometheus::detail::CKMSQuantiles::Quantile>;
@@ -360,8 +367,7 @@ class PrometheusMetrics: public MetricsBase {
     prometheus::Family<prometheus::Gauge> &query_vector_response_per_second_ = prometheus::BuildGauge()
         .Name("query_vector_response_per_microsecond")
         .Help("the number of vectors can be queried every second ")
-        .Register(*registry_);
-    prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({});
+        .Register(*registry_); prometheus::Gauge &query_vector_response_per_second_gauge_ = query_vector_response_per_second_.Add({});
 
     prometheus::Family<prometheus::Gauge> &query_response_per_second_ = prometheus::BuildGauge()
         .Name("query_response_per_microsecond")
diff --git a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp
index f55a0c4c9351971066414997905857411473b7db..1e44c0e4698cec026e93a40a76fcc82b86af3ba1 100644
--- a/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp
+++ b/cpp/src/sdk/examples/grpcsimple/src/ClientTest.cpp
@@ -209,9 +209,9 @@ ClientTest::Test(const std::string& address, const std::string& port) {
         std::cout << "All tables: " << std::endl;
         for(auto& table : tables) {
             int64_t row_count = 0;
-            conn->DropTable(table);
-//            stat = conn->CountTable(table, row_count);
-//            std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl;
+//            conn->DropTable(table);
+            stat = conn->CountTable(table, row_count);
+            std::cout << "\t" << table << "(" << row_count << " rows)" << std::endl;
         }
     }
 
@@ -263,6 +263,9 @@ ClientTest::Test(const std::string& address, const std::string& port) {
                 search_record_array.push_back(
                         std::make_pair(record_ids[SEARCH_TARGET], record_array[SEARCH_TARGET]));
             }
+            int64_t row_count;
+            conn->CountTable(TABLE_NAME, row_count);
+            std::cout << "\t" << TABLE_NAME << "(" << row_count << " rows)" << std::endl;
         }
     }
 
diff --git a/cpp/src/server/ServerConfig.h b/cpp/src/server/ServerConfig.h
index 49dc6f50fde8a329fc7fea19aff35f663c301787..6a76399d42c92e856f353ddf4cb04e9cd8414cc9 100644
--- a/cpp/src/server/ServerConfig.h
+++ b/cpp/src/server/ServerConfig.h
@@ -36,8 +36,10 @@ static const char* CONFIG_LOG = "log_config";
 static const char* CONFIG_CACHE = "cache_config";
 static const char* CONFIG_CPU_CACHE_CAPACITY = "cpu_cache_capacity";
 static const char* CONFIG_GPU_CACHE_CAPACITY = "gpu_cache_capacity";
-static const char* CACHE_FREE_PERCENT = "cache_free_percent";
+static const char* CACHE_FREE_PERCENT = "cpu_cache_free_percent";
 static const char* CONFIG_INSERT_CACHE_IMMEDIATELY = "insert_cache_immediately";
+static const char* CONFIG_GPU_IDS = "gpu_ids";
+static const char *GPU_CACHE_FREE_PERCENT = "gpu_cache_free_percent";
 
 static const char* CONFIG_LICENSE = "license_config";
 static const char* CONFIG_LICENSE_PATH = "license_path";
diff --git a/cpp/unittest/server/cache_test.cpp b/cpp/unittest/server/cache_test.cpp
index 4d9379dc7342d2ec608caee0bbc66107a2c10d60..a4e19f0a980973f0589469ca75eefb2b87370022 100644
--- a/cpp/unittest/server/cache_test.cpp
+++ b/cpp/unittest/server/cache_test.cpp
@@ -146,7 +146,7 @@ TEST(CacheTest, CPU_CACHE_TEST) {
 }
 
 TEST(CacheTest, GPU_CACHE_TEST) {
-    cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance();
+    cache::CacheMgr* gpu_mgr = cache::GpuCacheMgr::GetInstance(0);
 
     const int dim = 256;
 
@@ -164,6 +164,25 @@ TEST(CacheTest, GPU_CACHE_TEST) {
 
     gpu_mgr->ClearCache();
     ASSERT_EQ(gpu_mgr->ItemCount(), 0);
+
+    gpu_mgr->SetCapacity(4096000000);
+    for (auto i = 0; i < 3; i++) {
+        MockVecIndex *mock_index = new MockVecIndex();
+        mock_index->ntotal_ = 1000000;  //2G
+        engine::VecIndexPtr index(mock_index);
+        cache::DataObjPtr data_obj = std::make_shared<cache::DataObj>(index);
+        std::cout << data_obj->size() <<std::endl;
+        gpu_mgr->InsertItem("index_" + std::to_string(i), data_obj);
+    }
+
+//    ASSERT_EQ(gpu_mgr->ItemCount(), 2);
+//    auto obj0 = gpu_mgr->GetItem("index_0");
+//    ASSERT_EQ(obj0, nullptr);
+//    auto obj1 = gpu_mgr->GetItem("index_1");
+//    auto obj2 = gpu_mgr->GetItem("index_2");
+    gpu_mgr->ClearCache();
+    ASSERT_EQ(gpu_mgr->ItemCount(), 0);
+
 }
 
 TEST(CacheTest, INVALID_TEST) {