diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
index 5f916f3370a9afe9f81318252cc94253fbb93eb5..c6c9291388be66a45d2072d9a6f8ff1404f6ecad 100644
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.cpp
@@ -180,7 +180,7 @@ IVFSQHybrid::UnsetQuantizer() {
     ivf_index->quantizer = nullptr;
 }
 
-void
+VectorIndexPtr
 IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
     auto quantizer_conf = std::dynamic_pointer_cast<QuantizerCfg>(conf);
     if (quantizer_conf != nullptr) {
@@ -207,8 +207,10 @@ IVFSQHybrid::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
         index_composition->mode = quantizer_conf->mode;  // only 2
 
         auto gpu_index = faiss::gpu::index_cpu_to_gpu(res->faiss_res.get(), gpu_id_, index_composition, &option);
-        index_.reset(gpu_index);
-        gpu_mode = 2;  // all in gpu
+        std::shared_ptr<faiss::Index> new_idx;
+        new_idx.reset(gpu_index);
+        auto sq_idx = std::make_shared<IVFSQHybrid>(new_idx, gpu_id_, res);
+        return sq_idx;
     } else {
         KNOWHERE_THROW_MSG("CopyCpuToGpu Error, can't get gpu_resource");
     }
diff --git a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
index 4273a412f6b158936a5fdd3aae8fb511c0bb0804..d0c58baaf3aef9ab0167f310137832b4d702b726 100644
--- a/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
+++ b/cpp/src/core/knowhere/knowhere/index/vector_index/IndexIVFSQHybrid.h
@@ -60,8 +60,7 @@ class IVFSQHybrid : public GPUIVFSQ {
     void
     UnsetQuantizer();
 
-    // todo(xiaojun): return void => VecIndex
-    void
+    VectorIndexPtr
     LoadData(const knowhere::QuantizerPtr& q, const Config& conf);
 
     IndexModelPtr
diff --git a/cpp/src/core/unittest/test_ivf.cpp b/cpp/src/core/unittest/test_ivf.cpp
index c6faea9182c866c25d2e409e5123c45d82e6fb3c..c5066e96710906a7c1a517c42f90043b16a3eb5f 100644
--- a/cpp/src/core/unittest/test_ivf.cpp
+++ b/cpp/src/core/unittest/test_ivf.cpp
@@ -253,9 +253,9 @@ TEST_P(IVFTest, hybrid) {
         quantizer_conf->gpu_id = device_id;
         auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf);
         quantizer_conf->mode = 2;
-        hybrid_2_idx->LoadData(q, quantizer_conf);
+        auto gpu_idx = hybrid_2_idx->LoadData(q, quantizer_conf);
 
-        auto result = hybrid_2_idx->Search(query_dataset, conf);
+        auto result = gpu_idx->Search(query_dataset, conf);
         AssertAnns(result, nq, conf->k);
         PrintResult(result, nq, k);
     }
diff --git a/cpp/src/db/engine/ExecutionEngineImpl.cpp b/cpp/src/db/engine/ExecutionEngineImpl.cpp
index 6b8bb622ddb488514ea9ce3b5e19ee4085d66391..c70a5c3b210e8eed356a3a8336de6042c83fa2c3 100644
--- a/cpp/src/db/engine/ExecutionEngineImpl.cpp
+++ b/cpp/src/db/engine/ExecutionEngineImpl.cpp
@@ -256,11 +256,14 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
         conf->gpu_id = device_id;
 
         if (quantizer) {
+            std::cout << "cache hit" << std::endl;
             // cache hit
             conf->mode = 2;
-            index_->SetQuantizer(quantizer->Data());
-            index_->LoadData(quantizer->Data(), conf);
+            auto new_index = index_->LoadData(quantizer->Data(), conf);
+            index_ = new_index;
         } else {
+            std::cout << "cache miss" << std::endl;
+            // cache hit
             // cache miss
             if (index_ == nullptr) {
                 ENGINE_LOG_ERROR << "ExecutionEngineImpl: index is null, failed to copy to gpu";
@@ -268,9 +271,9 @@ ExecutionEngineImpl::CopyToGpu(uint64_t device_id, bool hybrid) {
             }
             conf->mode = 1;
             auto q = index_->LoadQuantizer(conf);
-            index_->SetQuantizer(q);
             conf->mode = 2;
-            index_->LoadData(q, conf);
+            auto new_index = index_->LoadData(q, conf);
+            index_ = new_index;
 
             // cache
             auto cached_quantizer = std::make_shared<CachedQuantizer>(q);
@@ -445,7 +448,9 @@ ExecutionEngineImpl::Search(int64_t n, const float* data, int64_t k, int64_t npr
 
     auto status = index_->Search(n, data, distances, labels, conf);
 
-    HybridUnset();
+    if (hybrid) {
+        HybridUnset();
+    }
 
     if (!status.ok()) {
         ENGINE_LOG_ERROR << "Search error";
diff --git a/cpp/src/wrapper/VecImpl.cpp b/cpp/src/wrapper/VecImpl.cpp
index 1ed20c8029b76408078fe76b57c33d9f22530e80..3ff79690aa21fb093a1ff965e8212d7bf0197ed6 100644
--- a/cpp/src/wrapper/VecImpl.cpp
+++ b/cpp/src/wrapper/VecImpl.cpp
@@ -315,24 +315,21 @@ IVFHybridIndex::UnsetQuantizer() {
     return Status::OK();
 }
 
-Status
+VecIndexPtr
 IVFHybridIndex::LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
     try {
         // TODO(linxj): Hardcode here
         if (auto new_idx = std::dynamic_pointer_cast<knowhere::IVFSQHybrid>(index_)) {
-            new_idx->LoadData(q, conf);
+            return std::make_shared<IVFHybridIndex>(new_idx->LoadData(q, conf), type);
         } else {
             WRAPPER_LOG_ERROR << "Hybrid mode not support for index type: " << int(type);
-            return Status(KNOWHERE_ERROR, "not support");
         }
     } catch (knowhere::KnowhereException& e) {
         WRAPPER_LOG_ERROR << e.what();
-        return Status(KNOWHERE_UNEXPECTED_ERROR, e.what());
     } catch (std::exception& e) {
         WRAPPER_LOG_ERROR << e.what();
-        return Status(KNOWHERE_ERROR, e.what());
     }
-    return Status::OK();
+    return nullptr;
 }
 
 }  // namespace engine
diff --git a/cpp/src/wrapper/VecImpl.h b/cpp/src/wrapper/VecImpl.h
index fd9bb79c0a9340229881532b50bbdde1f265820d..1f5ca296bb80bd087f0c499b430ce606edf37d50 100644
--- a/cpp/src/wrapper/VecImpl.h
+++ b/cpp/src/wrapper/VecImpl.h
@@ -106,7 +106,7 @@ class IVFHybridIndex : public IVFMixIndex {
     Status
     UnsetQuantizer() override;
 
-    Status
+    VecIndexPtr
     LoadData(const knowhere::QuantizerPtr& q, const Config& conf) override;
 };
 
diff --git a/cpp/src/wrapper/VecIndex.h b/cpp/src/wrapper/VecIndex.h
index f5fdd49466bcd4ce5c8f2ff484dbe8103503f5bd..55981ef528a330d780803e18a651f3b1734ff9ef 100644
--- a/cpp/src/wrapper/VecIndex.h
+++ b/cpp/src/wrapper/VecIndex.h
@@ -103,9 +103,9 @@ class VecIndex : public cache::DataObj {
         return nullptr;
     }
 
-    virtual Status
+    virtual VecIndexPtr
     LoadData(const knowhere::QuantizerPtr& q, const Config& conf) {
-        return Status::OK();
+        return nullptr;
     }
 
     virtual Status