diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 03066aca2b85e9c5473e1ec0dae648fdd86e41ad..861889266b0132b8812d2d958dd6675dc631fd33 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -832,6 +832,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
     list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
     list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
 
+    set(PDCODEGEN "${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen")
     if (${FOR_SERVING_SIDE})
         add_custom_command(
             OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
@@ -842,7 +843,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
                 --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen
                 --proto_path=${CMAKE_SOURCE_DIR}/core/predictor/proto
                 ${_protobuf_include_path} ${ABS_FIL}
-            DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
+            DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN}
             COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}"
             VERBATIM)
     else()
@@ -854,7 +855,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
                 --pdcodegen_out=${CMAKE_CURRENT_BINARY_DIR}
                 --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/pdcodegen/pdcodegen
                 ${_protobuf_include_path} ${ABS_FIL}
-            DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
+            DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN}
             COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}"
             VERBATIM)
     endif()
diff --git a/core/sdk-cpp/proto/CMakeLists.txt b/core/sdk-cpp/proto/CMakeLists.txt
index 5bc378398f341025d48b1d39d22785305aba2dea..e0fb6aad3dba9491cde76017550f248f1e573e90 100644
--- a/core/sdk-cpp/proto/CMakeLists.txt
+++ b/core/sdk-cpp/proto/CMakeLists.txt
@@ -37,6 +37,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
       list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
       list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
 
+      set(PDCODEGEN "${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen")
       if (${FOR_SERVING_SIDE})
           add_custom_command(
               OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
@@ -47,7 +48,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
                   --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen
                   --proto_path=${CMAKE_SOURCE_DIR}/core/predictor/proto
                   ${_protobuf_include_path} ${ABS_FIL}
-              DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
+              DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN}
               COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}"
               VERBATIM)
       else()
@@ -59,7 +60,7 @@ function(PROTOBUF_GENERATE_SERVING_CPP FOR_SERVING_SIDE SRCS HDRS )
                   --pdcodegen_out=${CMAKE_CURRENT_BINARY_DIR}
                   --plugin=protoc-gen-pdcodegen=${CMAKE_BINARY_DIR}/core/pdcodegen/pdcodegen
                   ${_protobuf_include_path} ${ABS_FIL}
-	      DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
+	      DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} ${PDCODEGEN}
               COMMENT "Running Paddle-serving C++ protocol buffer compiler on ${FIL}"
               VERBATIM)
     endif()
diff --git a/core/sdk-cpp/src/config_manager.cpp b/core/sdk-cpp/src/config_manager.cpp
index c422f0b52eba7d3a34e663f4198b9914a7722704..e3126855e082feaf9c6d237692c214fa8f66577b 100644
--- a/core/sdk-cpp/src/config_manager.cpp
+++ b/core/sdk-cpp/src/config_manager.cpp
@@ -31,6 +31,8 @@ int EndpointConfigManager::create(const std::string& sdk_desc_str) {
     LOG(ERROR) << "Failed reload endpoint config";
     return -1;
   }
+
+  return 0;
 }
 
 int EndpointConfigManager::create(const char* path, const char* file) {
diff --git a/doc/RUN_IN_DOCKER.md b/doc/RUN_IN_DOCKER.md
index 708739851b8e3ec5ca8b5e204a68169ec88041b5..48d1d265665fe40a97c6423a34c1e4d9361c850a 100644
--- a/doc/RUN_IN_DOCKER.md
+++ b/doc/RUN_IN_DOCKER.md
@@ -1,6 +1,6 @@
 # How to run PaddleServing in Docker
 
-([简体中文](./RUN_IN_DOCKER_CN.md)|English)
+([简体中文](RUN_IN_DOCKER_CN.md)|English)
 
 ## Requirements
 
@@ -137,6 +137,13 @@ pip install paddle-serving-server-gpu
 
 ### Test example
 
+When running the GPU Server, you need to set the GPUs used by the prediction service through the `--gpu_ids` option, and the CPU is used by default. An error will be reported when the value of `--gpu_ids` exceeds the environment variable `CUDA_VISIBLE_DEVICES`. The following example specifies to use a GPU with index 0:
+```shell
+export CUDA_VISIBLE_DEVICES=0,1
+python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --gpu_ids 0
+```
+
+
 Get the trained Boston house price prediction model by the following command:
 
 ```bash
@@ -180,4 +187,9 @@ tar -xzf uci_housing.tar.gz
   print(fetch_map)
   ```
 
-  
+
+
+
+## Attention
+
+The images provided by this document are all runtime images, which do not support compilation. If you want to compile from source, refer to [COMPILE](COMPILE.md).
diff --git a/doc/RUN_IN_DOCKER_CN.md b/doc/RUN_IN_DOCKER_CN.md
index 9f2abba176ca89f6d03d9602c2fd1e7d4a78980b..8800b3a30690e03fce739714af1f24a3c8333b7f 100644
--- a/doc/RUN_IN_DOCKER_CN.md
+++ b/doc/RUN_IN_DOCKER_CN.md
@@ -129,12 +129,13 @@ pip install paddle-serving-server-gpu
 
 ### 测试example
 
-GPU版本在运行Server端代码前需要设置`CUDA_VISIBLE_DEVICES`环境变量来指定预测服务使用的GPU，下面的示例为指定索引为0和1两块GPU：
-
-```bash
- export CUDA_VISIBLE_DEVICES=0,1
+在运行GPU版Server时需要通过`--gpu_ids`选项设置预测服务使用的GPU，缺省状态默认使用CPU。当设置的`--gpu_ids`超出环境变量`CUDA_VISIBLE_DEVICES`时会报错。下面的示例为指定使用索引为0的GPU：
+```shell
+export CUDA_VISIBLE_DEVICES=0,1
+python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --gpu_ids 0
 ```
 
+
 通过下面命令获取训练好的Boston房价预估模型：
 
 ```bash
@@ -177,3 +178,7 @@ tar -xzf uci_housing.tar.gz
   fetch_map = client.predict(feed={"x": data}, fetch=["price"])
   print(fetch_map)
   ```
+
+## 注意事项
+
+该文档提供的镜像均为运行镜像，不支持开发编译。如果想要从源码编译，请查看[如何编译PaddleServing](COMPILE.md)。
diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py
index 9b8e301a62eb0eee161cd701555543d329c6ae83..7cedb6aa451e0e4a128f0fedbfde1a896977f601 100644
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
@@ -53,7 +53,7 @@ def single_func(idx, resource):
                     feed_batch.append(reader.process(dataset[bi]))
                 b_end = time.time()
                 if profile_flags:
-                    print("PROFILE\tpid:{}\tbert+pre_0:{} bert_pre_1:{}".format(
+                    print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format(
                         os.getpid(),
                         int(round(b_start * 1000000)),
                         int(round(b_end * 1000000))))
@@ -69,9 +69,7 @@ def single_func(idx, resource):
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = [
-        "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295"
-    ]
+    endpoint_list = ["127.0.0.1:9292"]
     result = multi_thread_runner.run(single_func, args.thread,
                                      {"endpoint": endpoint_list})
     avg_cost = 0
diff --git a/python/examples/criteo_ctr_with_cube/README.md b/python/examples/criteo_ctr_with_cube/README.md
index eed612f4043ff7fdb41538b2a425d98f0d045718..4a69fd528a3d25f4010d6fdc724146027a90e896 100755
--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -2,21 +2,35 @@
 
 ([简体中文](./README_CN.md)|English)
 
+### Compile Source Code
+in the root directory of this git project
+```
+mkdir build_server
+cd build_server
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/  -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so  -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python  -DCLIENT_ONLY=OFF ..
+make -j10
+make install -j10
+```
+
 ### Get Sample Dataset
 
+go to directory `python/examples/criteo_ctr_with_cube`
 ```
 sh get_data.sh
 ```
 
-### Train and Save Model
+### Download Model and Sparse Parameter Sequence Files
 ```
-python local_train.py
+wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
+tar xf ctr_cube_unittest.tar.gz
+mv models/ctr_client_conf ./
+mv models/ctr_serving_model_kv ./
+mv models/data ./cube/
 ```
-the trained model will be in ./ctr_server_model and ./ctr_client_config, and ctr_server_model_kv, ctr_client_conf_kv。
+the model will be in ./ctr_server_model_kv and ./ctr_client_config.
 
 ### Start Sparse Parameter Indexing Service
 ```
-cp ../../../build_server/core/predictor/seq_generator seq_generator
 cp ../../../build_server/output/bin/cube* ./cube/
 sh cube_prepare.sh &
 ```
diff --git a/python/examples/criteo_ctr_with_cube/README_CN.md b/python/examples/criteo_ctr_with_cube/README_CN.md
index 868e8bce5a624904d532bf956fd5868abc0a1c52..5bc36869abe0c1f3fc010c2893731a0e86a1c270 100644
--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
@@ -1,20 +1,34 @@
 ## 带稀疏参数索引服务的CTR预测服务
 (简体中文|[English](./README.md))
 
+### 编译源代码
+在本项目的根目录下，执行
+```
+mkdir build_server
+cd build_server
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/  -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so  -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python  -DCLIENT_ONLY=OFF ..
+make -j10
+make install -j10
+```
+
 ### 获取样例数据
+进入目录 `python/examples/criteo_ctr_with_cube`
 ```
 sh get_data.sh
 ```
 
-### 保存模型和配置文件
+### 下载模型和稀疏参数序列文件
 ```
-python local_train.py
+wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
+tar xf ctr_cube_unittest.tar.gz
+mv models/ctr_client_conf ./
+mv models/ctr_serving_model_kv ./
+mv models/data ./cube/
 ```
-执行脚本后会在当前目录生成ctr_server_model和ctr_client_config文件夹,以及ctr_server_model_kv, ctr_client_conf_kv。
+执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。
 
 ### 启动稀疏参数索引服务
 ```
-cp ../../../build_server/core/predictor/seq_generator seq_generator
 cp ../../../build_server/output/bin/cube* ./cube/
 sh cube_prepare.sh &
 ```
diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py
index 2a2e9ea20d7e428cfe42393e2fee60035c33283d..cda0f33ac82d0bd228a22a8f438cbe1aa013eadf 100644
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
@@ -30,7 +30,10 @@ def predict(image_path, server):
     req = json.dumps({"image": image, "fetch": ["score"]})
     r = requests.post(
         server, data=req, headers={"Content-Type": "application/json"})
-    print(r.json()["score"][0])
+    try:
+        print(r.json()["score"][0])
+    except ValueError:
+        print(r.text)
     return r
 
 
diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py
index f1a3dcf612e34d83387163d9fea491a7dca2c579..d723795f214e22957bff49f0ddf8fd42086b8a7e 100644
--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -32,7 +32,7 @@ def save_model(server_model_folder,
     executor = Executor(place=CPUPlace())
 
     feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
-    target_vars = fetch_var_dict.values()
+    target_vars = list(fetch_var_dict.values())
 
     save_inference_model(
         server_model_folder,
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index 279e3a895e975473fc5569c4716368c3dda1d9f1..088e3928f4409eaac4d42d771a72ecc9d13fdbce 100644
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -38,6 +38,8 @@ def parse_args():  # pylint: disable=doc-string-missing
         help="Working dir of current service")
     parser.add_argument(
         "--device", type=str, default="cpu", help="Type of device")
+    parser.add_argument(
+        "--mem_optim", type=bool, default=False, help="Memory optimize")
     return parser.parse_args()
 
 
@@ -48,6 +50,7 @@ def start_standard_model():  # pylint: disable=doc-string-missing
     port = args.port
     workdir = args.workdir
     device = args.device
+    mem_optim = args.mem_optim
 
     if model == "":
         print("You must specify your serving model")
@@ -67,6 +70,7 @@ def start_standard_model():  # pylint: disable=doc-string-missing
     server = serving.Server()
     server.set_op_sequence(op_seq_maker.get_op_sequence())
     server.set_num_threads(thread_num)
+    server.set_memory_optimize(mem_optim)
 
     server.load_model_config(model)
     server.prepare_server(workdir=workdir, port=port, device=device)
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 2fd35c6d66e4bf282224a8775f1a6bf0d1c6a8c5..3dd330b18921c81cf17601ff7e52d860f0322f95 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -43,6 +43,8 @@ def serve_args():
     parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
     parser.add_argument(
         "--name", type=str, default="None", help="Default service name")
+    parser.add_argument(
+        "--mem_optim", type=bool, default=False, help="Memory optimize")
     return parser.parse_args()
 
 
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index d09efbfc8e1512ecb75b063ad760ce66e1a3159e..cb82e02cbec83324a6cb6029208325d8ce38e263 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -33,6 +33,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
         port = args.port + index
     thread_num = args.thread
     model = args.model
+    mem_optim = args.mem_optim
     workdir = "{}_{}".format(args.workdir, gpuid)
 
     if model == "":
@@ -53,6 +54,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
     server = serving.Server()
     server.set_op_sequence(op_seq_maker.get_op_sequence())
     server.set_num_threads(thread_num)
+    server.set_memory_optimize(mem_optim)
 
     server.load_model_config(model)
     server.prepare_server(workdir=workdir, port=port, device=device)