diff --git a/core/predictor/framework/infer.cpp b/core/predictor/framework/infer.cpp
index e0c284df5b9427e8e60bc1cfa19941f20cf2be9f..0290612287de7c5d63626fb28ebc092f03dd4d15 100644
--- a/core/predictor/framework/infer.cpp
+++ b/core/predictor/framework/infer.cpp
@@ -349,7 +349,7 @@ T* VersionedInferEngine::get_core() {
 }
 
 template <typename T>
-T* VersionedInferEngine::get_core(uint64_t version) {
+T* VersionedInferEngine::get_core(const uint64_t version) {
   auto iter = _versions.find(version);
   if (iter == _versions.end()) {
     LOG(ERROR) << "Not found version engine: " << version;
@@ -539,7 +539,7 @@ int InferManager::infer(const char* model_name,
 }
 
 template <typename T>
-T* InferManager::get_core(const char* model_name, uint64_t version) {
+T* InferManager::get_core(const char* model_name, const uint64_t version) {
   auto it = _map.find(model_name);
   if (it == _map.end()) {
     LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 67a7cf2f6396ec1b5b47c23f87b78ae77c178427..45146e43c6e503b69e3534a119d31c0da9d0bcfa 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -277,7 +277,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
     LOG(WARNING) << "Loading cube cache[" << next_idx << "] ...";
     std::string model_path = conf.model_dir();
     if (access(model_path.c_str(), F_OK) == 0) {
-      std::string cube_cache_path = model_path + "cube_cache";
+      std::string cube_cache_path = model_path + "/" + "cube_cache";
       int reload_cache_ret = md->caches[next_idx]->reload_data(cube_cache_path);
       LOG(WARNING) << "Loading cube cache[" << next_idx << "] done.";
     } else {
@@ -543,9 +543,9 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
         lod_tensor_in->CopyFromCpu(data);
       } else {
         LOG(ERROR) << "Inference not support type["
-                   << (*tensorVector_in_pointer)[i].dtype
-                   << "],name[" << (*tensorVector_in_pointer)[i].name
-                   << "]" << " copy into core failed!";
+                   << (*tensorVector_in_pointer)[i].dtype << "],name["
+                   << (*tensorVector_in_pointer)[i].name << "]"
+                   << " copy into core failed!";
       }
       // Paddle inference will support FP16 in next version.
       // else if ((*tensorVector_in_pointer)[i].dtype ==
@@ -724,7 +724,7 @@ class VersionedInferEngine : public InferEngine {
   int infer(const void* in, void* out, uint32_t batch_size, uint64_t version);
 
   template <typename T>
-  T* get_core(uint64_t version);
+  T* get_core(const uint64_t version);
 
   int proc_initialize_impl(const configure::EngineDesc& conf, bool);
 
@@ -789,7 +789,7 @@ class InferManager {
 
   // Versioned get engine core
   template <typename T>
-  T* get_core(const char* model_name, uint64_t version);
+  T* get_core(const char* model_name, const uint64_t version);
 
   // query model version
   int query_version(const std::string& model, uint64_t& version);
diff --git a/python/pipeline/gateway/proto/gateway.proto b/python/pipeline/gateway/proto/gateway.proto
index 68df5747845947339906b4c85f063cded160d755..8dc27a2e953ffe66aa40818ff02ec313293b4c2a 100644
--- a/python/pipeline/gateway/proto/gateway.proto
+++ b/python/pipeline/gateway/proto/gateway.proto
@@ -51,6 +51,12 @@ message Tensor {
   // VarType: STRING
   repeated string str_data = 9;
 
+  // VarType: BYTES, is suitable for big data. No need to save data types and
+  // dimensions
+  // pack method: pack by BytesIO, saved by np.save
+  // unpack method: load by np.load, unpack by BytesIO.
+  bytes byte_data = 10;
+
   // Element types:
   //   0 => INT64
   //   1 => FP32
@@ -65,17 +71,18 @@ message Tensor {
   //  10 => COMPLEX64
   //  11 => COMPLEX128
   //  12 => STRING
-  int32 elem_type = 10;
+  //  13 => BYTES
+  int32 elem_type = 20;
 
   // Shape of the tensor, including batch dimensions.
-  repeated int32 shape = 11;
+  repeated int32 shape = 21;
 
   // Level of data(LOD), support variable length data, only for fetch tensor
   // currently.
-  repeated int32 lod = 12;
+  repeated int32 lod = 22;
 
   // Correspond to the variable 'name' in the model description prototxt.
-  string name = 13;
+  string name = 23;
 };
 
 // The structure of the service request. The input data can be repeated string
diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py
index 2a74714fa8a1e30b44f2f64fbc7154162878df9a..4d4717d66f6877e9b9832c7d1a346eea48ddc84b 100644
--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -26,6 +26,7 @@ import collections
 import numpy as np
 import json
 from numpy import *
+from io import BytesIO
 if sys.version_info.major == 2:
     import Queue
 elif sys.version_info.major == 3:
@@ -59,7 +60,8 @@ _TENSOR_DTYPE_2_NUMPY_DATA_DTYPE = {
     9: "bool",  # VarType.BOOL
     10: "complex64",  # VarType.COMPLEX64
     11: "complex128",  # VarType.COMPLEX128
-    12: "string",  # dismatch with numpy
+    12: "string",  # load by numpy
+    13: "bytes",  # load by numpy
 }
 
 
@@ -1577,10 +1579,11 @@ class RequestOp(Op):
 		UINT8
 		INT8
 		BOOL
+                BYTES
         Unsupported type:
+                STRING
                 COMPLEX64
                 COMPLEX128
-                STRING
 
         Args:
             tensor: one tensor in request.tensors.
@@ -1634,6 +1637,10 @@ class RequestOp(Op):
         elif tensor.elem_type == 9:
             # VarType: BOOL
             np_data = np.array(tensor.bool_data).astype(bool).reshape(dims)
+        elif tensor.elem_type == 13:
+            # VarType: BYTES
+            byte_data = BytesIO(tensor.byte_data)
+            np_data = np.load(byte_data, allow_pickle=True)
         else:
             _LOGGER.error("Sorry, the type {} of tensor {} is not supported.".
                           format(tensor.elem_type, tensor.name))
diff --git a/python/pipeline/pipeline_client.py b/python/pipeline/pipeline_client.py
index cf5462d6bdae2da621b78dbad7770b596e3ecb84..5585463e8edd859d9e542d22d7bed0da53e7d857 100644
--- a/python/pipeline/pipeline_client.py
+++ b/python/pipeline/pipeline_client.py
@@ -25,6 +25,7 @@ from .channel import ChannelDataErrcode
 from .proto import pipeline_service_pb2
 from .proto import pipeline_service_pb2_grpc
 import six
+from io import BytesIO
 _LOGGER = logging.getLogger(__name__)
 
 
@@ -47,7 +48,8 @@ class PipelineClient(object):
         self._stub = pipeline_service_pb2_grpc.PipelineServiceStub(
             self._channel)
 
-    def _pack_request_package(self, feed_dict, pack_tensor_format, profile):
+    def _pack_request_package(self, feed_dict, pack_tensor_format,
+                              use_tensor_bytes, profile):
         req = pipeline_service_pb2.Request()
 
         logid = feed_dict.get("logid")
@@ -99,11 +101,9 @@ class PipelineClient(object):
                 one_tensor = req.tensors.add()
                 one_tensor.name = key
 
-                if (sys.version_info.major == 2 and
-                        isinstance(value, (str, unicode)) or
-                    ((sys.version_info.major == 3) and isinstance(value, str))):
+                if isinstance(value, str):
                     one_tensor.string_data.add(value)
-                    one_tensor.elem_type = 12  #12 => string
+                    one_tensor.elem_type = 12  #12 => string in proto
                     continue
 
                 if isinstance(value, np.ndarray):
@@ -112,6 +112,13 @@ class PipelineClient(object):
                     for one_dim in value.shape:
                         one_tensor.shape.append(one_dim)
 
+                    # packed into bytes
+                    if use_tensor_bytes is True:
+                        np_bytes = BytesIO()
+                        np.save(np_bytes, value, allow_pickle=True)
+                        one_tensor.byte_data = np_bytes.getvalue()
+                        one_tensor.elem_type = 13  #13 => bytes in proto
+
                     flat_value = value.flatten().tolist()
                     # copy data
                     if value.dtype == "int64":
@@ -162,6 +169,7 @@ class PipelineClient(object):
                 fetch=None,
                 asyn=False,
                 pack_tensor_format=False,
+                use_tensor_bytes=False,
                 profile=False,
                 log_id=0):
         if not isinstance(feed_dict, dict):
@@ -170,7 +178,8 @@ class PipelineClient(object):
         if fetch is not None and not isinstance(fetch, list):
             raise TypeError("fetch must be list type with format: [name].")
         print("PipelineClient::predict pack_data time:{}".format(time.time()))
-        req = self._pack_request_package(feed_dict, pack_tensor_format, profile)
+        req = self._pack_request_package(feed_dict, pack_tensor_format,
+                                         use_tensor_bytes, profile)
         req.logid = log_id
         if not asyn:
             print("PipelineClient::predict before time:{}".format(time.time()))
diff --git a/python/pipeline/proto/pipeline_service.proto b/python/pipeline/proto/pipeline_service.proto
index e8bf04e3080d3f6cf6ed16232719d4da316a7506..ff3eda8bf9f5357a2930b05da0b7f893d916b2bd 100644
--- a/python/pipeline/proto/pipeline_service.proto
+++ b/python/pipeline/proto/pipeline_service.proto
@@ -48,6 +48,12 @@ message Tensor {
   // VarType: STRING
   repeated string str_data = 9;
 
+  // VarType: BYTES, is suitable for big data. No need to save data types and
+  // dimensions
+  // pack method: pack by BytesIO, saved by np.save
+  // unpack method: load by np.load, unpack by BytesIO.
+  bytes byte_data = 10;
+
   // Element types:
   //   0 => INT64
   //   1 => FP32
@@ -62,17 +68,18 @@ message Tensor {
   //  10 => COMPLEX64
   //  11 => COMPLEX128
   //  12 => STRING
-  int32 elem_type = 10;
+  //  13 => BYTES
+  int32 elem_type = 20;
 
   // Shape of the tensor, including batch dimensions.
-  repeated int32 shape = 11;
+  repeated int32 shape = 21;
 
   // Level of data(LOD), support variable length data, only for fetch tensor
   // currently.
-  repeated int32 lod = 12;
+  repeated int32 lod = 22;
 
   // Correspond to the variable 'name' in the model description prototxt.
-  string name = 13;
+  string name = 23;
 };
 
 // The structure of the service request. The input data can be repeated string