Merge pull request #919 from wangjiawei04/v0.4.0

V0.4.0 cherry-pick #918 #689 #917

Merge pull request #919 from wangjiawei04/v0.4.0
V0.4.0 cherry-pick #918 #689 #917
c1c4bd14 · Jiawei Wang · GitHub · a3def11f · 5dbd1b6e · c1c4bd14
31 changed file
--- a/README.md
+++ b/README.md
@@ -55,11 +55,13 @@ You may need to use a domestic mirror source (in China, you can use the Tsinghua

 If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.

-Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7 and Ubuntu 16/18.
+Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7, Ubuntu 16/18, Windows 10.

 Packages of paddle-serving-client and paddle-serving-app support Linux and Windows, but paddle-serving-client only support python2.7/3.6/3.7.

-Recommended to install paddle >= 1.8.2.
+Recommended to install paddle >= 1.8.4.
+
+For **Windows Users**, please read the document [Paddle Serving for Windows Users](./doc/WINDOWS_TUTORIAL.md)

 <h2 align="center"> Pre-built services with Paddle Serving</h2>


--- a/README_CN.md
+++ b/README_CN.md
@@ -57,11 +57,13 @@ pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0

 如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。

-paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7和Ubuntu 16/18。
+paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7, Ubuntu 16/18和Windows 10。

 paddle-serving-client和paddle-serving-app安装包支持Linux和Windows，其中paddle-serving-client仅支持python2.7/3.5/3.6。

-推荐安装1.8.2及以上版本的paddle
+推荐安装1.8.4及以上版本的paddle
+
+对于**Windows 10 用户**，请参考文档[Windows平台使用Paddle Serving指导](./doc/WINDOWS_TUTORIAL_CN.md)。

 <h2 align="center"> Paddle Serving预装的服务 </h2>


--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -114,7 +114,7 @@ ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)

 ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
+SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)

 if (WITH_TRT)
 ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
@@ -127,12 +127,17 @@ endif()
 ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)

+ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/cryptopp/lib/libcryptopp.a)
+
 LIST(APPEND external_project_dependencies paddle)

 LIST(APPEND paddle_depend_libs
-    xxhash)
+        xxhash cryptopp)
+

 if(WITH_TRT)
 LIST(APPEND paddle_depend_libs
    nvinfer nvinfer_plugin)
 endif()
+
--- a/core/predictor/tools/seq_generator.cpp
+++ b/core/predictor/tools/seq_generator.cpp
@@ -17,11 +17,11 @@
 #include <fstream>
 #include <iostream>
 #include <memory>
-#include <thread>
+#include <thread>  //NOLINT

 #include "core/predictor/framework.pb.h"
-#include "quant.h"
-#include "seq_file.h"
+#include "quant.h"     // NOLINT
+#include "seq_file.h"  // NOLINT

 inline uint64_t time_diff(const struct timeval &start_time,
                          const struct timeval &end_time) {
@@ -113,13 +113,15 @@ int dump_parameter(const char *input_file, const char *output_file) {
    // std::cout << "key_len " << key_len << " value_len " << value_buf_len
    // << std::endl;
    memcpy(value_buf, tensor_buf + offset, value_buf_len);
-    seq_file_writer.write((char *)&i, sizeof(i), value_buf, value_buf_len);
+    seq_file_writer.write(
+        std::to_string(i).c_str(), sizeof(i), value_buf, value_buf_len);
    offset += value_buf_len;
  }
  return 0;
 }

-float *read_embedding_table(const char *file1, std::vector<int64_t> &dims) {
+float *read_embedding_table(const char *file1,
+                            std::vector<int64_t> &dims) {  // NOLINT
  std::ifstream is(file1);
  // Step 1: is read version, os write version
  uint32_t version;
@@ -242,7 +244,7 @@ int compress_parameter_parallel(const char *file1,
          float x = *(emb_table + k * emb_size + e);
          int val = round((x - xmin) / scale);
          val = std::max(0, val);
-          val = std::min((int)pow2bits - 1, val);
+          val = std::min(static_cast<int>(pow2bits) - 1, val);
          *(tensor_temp + 2 * sizeof(float) + e) = val;
        }
        result[k] = tensor_temp;
@@ -262,7 +264,8 @@ int compress_parameter_parallel(const char *file1,
  }
  SeqFileWriter seq_file_writer(file2);
  for (int64_t i = 0; i < dict_size; i++) {
-    seq_file_writer.write((char *)&i, sizeof(i), result[i], per_line_size);
+    seq_file_writer.write(
+        std::to_string(i).c_str(), sizeof(i), result[i], per_line_size);
  }
  return 0;
 }

--- a/doc/WINDOWS_TUTORIAL.md
+++ b/doc/WINDOWS_TUTORIAL.md
+## Paddle Serving for Windows Users
+
+(English|[简体中文](./WINDOWS_TUTORIAL_CN.md))
+
+### Summary
+
+This document guides users how to build Paddle Serving service on the Windows platform. Due to the limited support of third-party libraries, the Windows platform currently only supports the use of web services to build local predictor prediction services. If you want to experience all the services, you need to use Docker for Windows to simulate the operating environment of Linux.
+
+### Running Paddle Serving on Native Windows System
+
+**Configure Python environment variables to PATH**: First, you need to add the directory where the Python executable program is located to the PATH. Usually in **System Properties/My Computer Properties**-**Advanced**-**Environment Variables**, click Path and add the path at the beginning. For example, `C:\Users\$USER\AppData\Local\Programs\Python\Python36`, and finally click **OK** continuously. If you enter python on Powershell, you can enter the python interactive interface, indicating that the environment variable configuration is successful.
+
+**Install wget**: Because all the downloads in the tutorial and the built-in model download function in `paddle_serving_app` all use the wget tool, download the binary package at the [link](http://gnuwin32.sourceforge.net/packages/wget.htm), unzip and copy it to `C:\Windows\System32`, if there is a security prompt, you need to pass it.
+
+**Install Git**: For details, see [Git official website](https://git-scm.com/downloads)
+
+**Install the necessary C++ library (optional)**: Some users may encounter the problem that the dll cannot be linked during the `import paddle` stage. It is recommended to [Install Visual Studio Community Edition](https://visualstudio.microsoft.com/), and install the relevant components of C++.
+
+**Install Paddle and Serving**: In Powershell, execute
+
+```
+python -m pip install -U paddle_serving_server paddle_serving_client paddle_serving_app paddlepaddle`
+```
+
+for GPU users,
+
+```
+python -m pip install -U paddle_serving_server_gpu paddle_serving_client paddle_serving_app paddlepaddle-gpu
+```
+
+**Git clone Serving Project:**
+
+```
+git clone https://github.com/paddlepaddle/Serving
+```
+
+**Run OCR example**:
+
+```
+cd Serving/python/example/ocr
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+python -m paddle_serving_app.package --get_model ocr_det
+tar -xzvf ocr_det.tar.gz
+python ocr_debugger_server.py &
+python ocr_web_client.py
+```
+
+### Create a new Paddle Serving Web Service on Windows
+
+Currently Windows supports the Local Predictor of the Web Service framework. The server code framework is as follows
+
+```
+# filename:your_webservice.py
+from paddle_serving_server.web_service import WebService
+# If it is the GPU version, please use from paddle_serving_server_gpu.web_service import WebService
+class YourWebService(WebService):
+    def preprocess(self, feed=[], fetch=[]):
+        #Implement pre-processing here
+        #feed_dict is key: var names, value: numpy array input
+        #fetch_names is a list of fetch variable names
+        The meaning of #is_batch is whether the numpy array in the value of feed_dict contains the batch dimension
+        return feed_dict, fetch_names, is_batch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        #fetch map is the returned dictionary after prediction, the key is the fetch names given when the process returns, and the value is the var specific value corresponding to the fetch names
+        #After processing here, the result needs to be converted into a dictionary again, and the type of values should be a list, so that it can be serialized in JSON to facilitate web return
+        return response
+
+your_service = YourService(name="XXX")
+your_service.load_model_config("your_model_path")
+your_service.prepare_server(workdir="workdir", port=9292)
+# If you are a GPU user, you can refer to the python example under python/examples/ocr
+your_service.run_debugger_service()
+# Windows platform cannot use run_rpc_service() interface
+your_service.run_web_service()
+```
+
+Client code example
+
+```
+# filename:your_client.py
+import requests
+import json
+import base64
+import os, sys
+import time
+import cv2 # If you need to upload pictures
+# Used for image reading, the principle is to use base64 encoding file content
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode(
+        'utf8') #data.tostring()).decode('utf8')
+
+headers = {"Content-type": "application/json"}
+url = "http://127.0.0.1:9292/XXX/prediction" # XXX depends on the initial name parameter of the server YourService
+r = requests.post(url=url, headers=headers, data=json.dumps(data))
+print(r.json())
+```
+
+The user only needs to follow the above instructions and implement the relevant content in the corresponding function. For more information, please refer to [How to develop a new Web Service? ](./NEW_WEB_SERVICE.md)
+
+Execute after development
+
+```
+python your_webservice.py &
+python your_client.py
+```
+
+Because the port needs to be occupied, there may be a security prompt during the startup process. Please click through and an IP address will be generated. It should be noted that when the Windows platform starts the service, the local IP address may not be 127.0.0.1. You need to confirm the IP address and then see how the Client should set the access IP.
+
+### Docker for Windows User Guide
+
+The above content is used for native Windows. If users want to experience complete functions, they need to use Docker tools to model Linux systems.
+
+Please refer to [Docker Desktop](https://www.docker.com/products/docker-desktop) to install Docker
+
+After installation, start the docker linux engine and download the relevant image. In the Serving directory
+
+```
+docker pull hub.baidubce.com/paddlepaddle/serving:latest-devel
+# There is no expose port here, users can set -p to perform port mapping as needed
+docker run --rm -dit --name serving_devel -v $PWD:/Serving hub.baidubce.com/paddlepaddle/serving:latest-devel
+docker exec -it serving_devel bash
+cd /Serving
+```
+
+The rest of the operations are exactly the same as the Linux version.
--- a/doc/WINDOWS_TUTORIAL_CN.md
+++ b/doc/WINDOWS_TUTORIAL_CN.md
+## Windows平台使用Paddle Serving指导
+
+([English](./WINDOWS_TUTORIAL.md)|简体中文）
+
+### 综述
+
+本文档指导用户如何在Windows平台手把手搭建Paddle Serving服务。由于受限第三方库的支持，Windows平台目前只支持用web service的方式搭建local predictor预测服务。如果想要体验全部的服务，需要使用Docker for Windows，来模拟Linux的运行环境。
+
+### 原生Windows系统运行Paddle Serving
+
+**配置Python环境变量到PATH**：首先需要将Python的可执行程序所在目录加入到PATH当中。通常在**系统属性/我的电脑属性**-**高级**-**环境变量** ，点选Path，并在开头加上路径。例如`C:\Users\$USER\AppData\Local\Programs\Python\Python36`，最后连续点击**确定** 。在Powershell上如果输入python可以进入python交互界面，说明环境变量配置成功。
+
+**安装wget工具**：由于教程当中所有的下载，以及`paddle_serving_app`当中内嵌的模型下载功能，都是用到wget工具，在链接[下载wget](http://gnuwin32.sourceforge.net/packages/wget.htm)，解压后复制到`C:\Windows\System32`下，如有安全提示需要通过。
+
+**安装Git工具**： 详情参见[Git官网](https://git-scm.com/downloads)
+
+**安装必要的C++库（可选）**：部分用户可能会在`import paddle`阶段遇见dll无法链接的问题，建议可以[安装Visual Studio社区版本](`https://visualstudio.microsoft.com/`) ，并且安装C++的相关组件。
+
+**安装Paddle和Serving**：在Powershell，执行
+
+```
+python -m pip install -U paddle_serving_server paddle_serving_client paddle_serving_app paddlepaddle`
+```
+
+如果是GPU用户
+
+```
+python -m pip install -U paddle_serving_server_gpu paddle_serving_client paddle_serving_app paddlepaddle-gpu
+```
+
+**下载Serving库**：
+
+```
+git clone https://github.com/paddlepaddle/Serving
+```
+
+**运行OCR示例**：
+
+```
+cd Serving/python/example/ocr
+python -m paddle_serving_app.package --get_model ocr_rec
+tar -xzvf ocr_rec.tar.gz
+python -m paddle_serving_app.package --get_model ocr_det
+tar -xzvf ocr_det.tar.gz
+python ocr_debugger_server.py &
+python ocr_web_client.py
+```
+
+### 创建新的Windows支持的Paddle Serving服务
+
+目前Windows支持Web Service框架的Local Predictor。服务端代码框架如下
+
+```
+# filename:your_webservice.py
+from paddle_serving_server.web_service import WebService
+# 如果是GPU版本，请使用 from paddle_serving_server_gpu.web_service import WebService
+class YourWebService(WebService):
+    def preprocess(self, feed=[], fetch=[]):
+        #在这里实现前处理
+        #feed_dict是 key: var names, value: numpy array input
+        #fetch_names 是fetch变量名列表
+        #is_batch的含义是feed_dict的value里的numpy array是否包含了batch维度
+        return feed_dict, fetch_names, is_batch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        #fetch map是经过预测之后的返回字典，key是process返回时给定的fetch names，value是对应fetch names的var具体值
+        #在这里做处理之后，结果需重新转换成字典，并且values的类型应是列表list，这样可以JSON序列化方便web返回
+        return response
+
+your_service = YourService(name="XXX")
+your_service.load_model_config("your_model_path")
+your_service.prepare_server(workdir="workdir", port=9292)
+# 如果是GPU用户，可以参照python/examples/ocr下的python示例
+your_service.run_debugger_service()
+# Windows平台不可以使用 run_rpc_service()接口
+your_service.run_web_service()
+```
+
+客户端代码示例
+
+```
+# filename：your_client.py
+import requests
+import json
+import base64
+import os, sys
+import time
+import cv2 # 如果需要上传图片
+# 用于图片读取，原理是采用base64编码文件内容
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode(
+        'utf8')  #data.tostring()).decode('utf8')
+
+headers = {"Content-type": "application/json"}
+url = "http://127.0.0.1:9292/XXX/prediction" # XXX取决于服务端YourService的初始化name参数
+r = requests.post(url=url, headers=headers, data=json.dumps(data))
+print(r.json())
+```
+
+用户只需要按照如上指示，在对应函数中实现相关内容即可。更多信息请参见[如何开发一个新的Web Service？](./NEW_WEB_SERVICE_CN.md)
+
+开发完成后执行
+
+```
+python your_webservice.py &
+python your_client.py
+```
+
+因为需要占用端口，因此启动过程可能会有安全提示，请点选通过，就会有IP地址生成。需要注意的是，Windows平台启动服务时，本地IP地址可能不是127.0.0.1，需要确认好IP地址再看Client应该如何设定访问IP。
+
+### Docker for Windows 使用指南
+
+以上内容用于原生的Windows，如果用户想要体验完整的功能，需要使用Docker工具，来模拟Linux系统。
+
+安装Docker请参考[Docker Desktop](https://www.docker.com/products/docker-desktop)
+
+安装之后启动docker的linux engine，下载相关镜像。在Serving目录下
+
+```
+docker pull hub.baidubce.com/paddlepaddle/serving:latest-devel
+# 此处没有expose端口，用户可根据需要设置-p来进行端口映射
+docker run --rm -dit --name serving_devel -v $PWD:/Serving hub.baidubce.com/paddlepaddle/serving:latest-devel 
+docker exec -it serving_devel bash
+cd /Serving
+```
+
+其余操作与Linux版本完全一致。
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -13,7 +13,6 @@
 // limitations under the License.

 #pragma once
-
 #include <pthread.h>
 #include <fstream>
 #include <map>
@@ -29,7 +28,6 @@ namespace paddle_serving {
 namespace fluid_cpu {

 using configure::SigmoidConf;
-
 class AutoLock {
 public:
  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
@@ -530,7 +528,60 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
    return 0;
  }
 };
+class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
+ public:
+  void ReadBinaryFile(const std::string& filename, std::string* contents) {
+    std::ifstream fin(filename, std::ios::in | std::ios::binary);
+    fin.seekg(0, std::ios::end);
+    contents->clear();
+    contents->resize(fin.tellg());
+    fin.seekg(0, std::ios::beg);
+    fin.read(&(contents->at(0)), contents->size());
+    fin.close();
+  }
+
+  int create(const predictor::InferEngineCreationParams& params) {
+    std::string data_path = params.get_path();
+    if (access(data_path.c_str(), F_OK) == -1) {
+      LOG(ERROR) << "create paddle predictor failed, path note exits: "
+                 << data_path;
+      return -1;
+    }
+
+    std::string model_buffer, params_buffer, key_buffer;
+    ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
+    ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
+    ReadBinaryFile(data_path + "key", &key_buffer);

+    VLOG(2) << "prepare for encryption model";
+
+    auto cipher = paddle::MakeCipher("");
+    std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
+    std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
+
+    paddle::AnalysisConfig analysis_config;
+    analysis_config.SetModelBuffer(&real_model_buffer[0],
+                                   real_model_buffer.size(),
+                                   &real_params_buffer[0],
+                                   real_params_buffer.size());
+    analysis_config.DisableGpu();
+    analysis_config.SetCpuMathLibraryNumThreads(1);
+    if (params.enable_memory_optimization()) {
+      analysis_config.EnableMemoryOptim();
+    }
+    analysis_config.SwitchSpecifyInputNames(true);
+    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    VLOG(2) << "decrypt model file sucess";
+    _core =
+        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
+    if (NULL == _core.get()) {
+      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
+      return -1;
+    }
+    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
+    return 0;
+  }
+};
 }  // namespace fluid_cpu
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
+++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
@@ -52,6 +52,13 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
    ::baidu::paddle_serving::predictor::InferEngine,
    "FLUID_CPU_NATIVE_DIR_SIGMOID");

+#if 1
+REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+    ::baidu::paddle_serving::predictor::FluidInferEngine<
+        FluidCpuAnalysisEncryptCore>,
+    ::baidu::paddle_serving::predictor::InferEngine,
+    "FLUID_CPU_ANALYSIS_ENCRYPT");
+#endif
 }  // namespace fluid_cpu
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -25,7 +25,6 @@
 #include "core/configure/inferencer_configure.pb.h"
 #include "core/predictor/framework/infer.h"
 #include "paddle_inference_api.h"  // NOLINT
-
 DECLARE_int32(gpuid);

 namespace baidu {
@@ -591,6 +590,60 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
  }
 };

+class FluidGpuAnalysisEncryptCore : public FluidFamilyCore {
+ public:
+  void ReadBinaryFile(const std::string& filename, std::string* contents) {
+    std::ifstream fin(filename, std::ios::in | std::ios::binary);
+    fin.seekg(0, std::ios::end);
+    contents->clear();
+    contents->resize(fin.tellg());
+    fin.seekg(0, std::ios::beg);
+    fin.read(&(contents->at(0)), contents->size());
+    fin.close();
+  }
+
+  int create(const predictor::InferEngineCreationParams& params) {
+    std::string data_path = params.get_path();
+    if (access(data_path.c_str(), F_OK) == -1) {
+      LOG(ERROR) << "create paddle predictor failed, path note exits: "
+                 << data_path;
+      return -1;
+    }
+
+    std::string model_buffer, params_buffer, key_buffer;
+    ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
+    ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
+    ReadBinaryFile(data_path + "key", &key_buffer);
+
+    VLOG(2) << "prepare for encryption model";
+
+    auto cipher = paddle::MakeCipher("");
+    std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
+    std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
+
+    paddle::AnalysisConfig analysis_config;
+    analysis_config.SetModelBuffer(&real_model_buffer[0],
+                                   real_model_buffer.size(),
+                                   &real_params_buffer[0],
+                                   real_params_buffer.size());
+    analysis_config.EnableUseGpu(100, FLAGS_gpuid);
+    analysis_config.SetCpuMathLibraryNumThreads(1);
+    if (params.enable_memory_optimization()) {
+      analysis_config.EnableMemoryOptim();
+    }
+    analysis_config.SwitchSpecifyInputNames(true);
+    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    VLOG(2) << "decrypt model file sucess";
+    _core =
+        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
+    if (NULL == _core.get()) {
+      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
+      return -1;
+    }
+    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
+    return 0;
+  }
+};
 }  // namespace fluid_gpu
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
+++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
@@ -54,6 +54,12 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
    ::baidu::paddle_serving::predictor::InferEngine,
    "FLUID_GPU_NATIVE_DIR_SIGMOID");

+REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+    ::baidu::paddle_serving::predictor::FluidInferEngine<
+        FluidGpuAnalysisEncryptCore>,
+    ::baidu::paddle_serving::predictor::InferEngine,
+    "FLUID_GPU_ANALYSIS_ENCRPT")
+
 }  // namespace fluid_gpu
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/python/examples/encryption/README.md
+++ b/python/examples/encryption/README.md
+# Encryption Model Prediction
+
+([简体中文](README_CN.md)|English)
+
+## Get Origin Model
+
+The example uses the model file of the fit_a_line example as a origin model
+
+```
+sh get_data.sh
+```
+
+## Encrypt Model
+
+```
+python encrypt.py
+```
+The key is stored in the `key` file, and the encrypted model file and server-side configuration file are stored in the `encrypt_server` directory.
+client-side configuration file are stored in the `encrypt_client` directory.
+
+## Start Encryption Service
+CPU Service
+```
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+```
+GPU Service
+```
+python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+```
+
+## Prediction
+```
+python test_client.py uci_housing_client/serving_client_conf.prototxt
+```
--- a/python/examples/encryption/README_CN.md
+++ b/python/examples/encryption/README_CN.md
+# 加密模型预测
+
+(简体中文|[English](README.md))
+
+## 获取明文模型
+
+示例中使用fit_a_line示例的模型文件作为明文模型
+
+```
+sh get_data.sh
+```
+
+## 模型加密
+
+```
+python encrypt.py
+```
+密钥保存在`key`文件中，加密模型文件以及server端配置文件保存在`encrypt_server`目录下，client端配置文件保存在`encrypt_client`目录下。
+
+## 启动加密预测服务
+CPU预测服务
+```
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+```
+GPU预测服务
+```
+python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+```
+
+## 预测
+```
+python test_client.py uci_housing_client/serving_client_conf.prototxt
+```
--- a/python/examples/encryption/encrypt.py
+++ b/python/examples/encryption/encrypt.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client.io import inference_model_to_serving
+
+
+def serving_encryption():
+    inference_model_to_serving(
+        dirname="./uci_housing_model",
+        serving_server="encrypt_server",
+        serving_client="encrypt_client",
+        encryption=True)
+
+
+if __name__ == "__main__":
+    serving_encryption()
--- a/python/examples/encryption/get_data.sh
+++ b/python/examples/encryption/get_data.sh
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing_example/encrypt.tar.gz
+tar -xzf encrypt.tar.gz
--- a/python/examples/ocr_detection/text_det_client.py
+++ b/python/examples/ocr_detection/text_det_client.py
@@ -11,37 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

-import os
 from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, File2Image, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes
+import sys

 client = Client()
-client.load_client_config("ocr_det_client/serving_client_conf.prototxt")
-client.connect(["127.0.0.1:9494"])
+client.load_client_config(sys.argv[1])
+client.use_key("./key")
+client.connect(["127.0.0.1:9300"], encryption=True)

-read_image_file = File2Image()
-preprocess = Sequential([
-    ResizeByFactor(32, 960), Div(255),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
-        (2, 0, 1))
-])
-post_func = DBPostProcess({
-    "thresh": 0.3,
-    "box_thresh": 0.5,
-    "max_candidates": 1000,
-    "unclip_ratio": 1.5,
-    "min_size": 3
-})
-filter_func = FilterBoxes(10, 10)
+import paddle
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=1)

-img = read_image_file(name)
-ori_h, ori_w, _ = img.shape
-img = preprocess(img)
-new_h, new_w, _ = img.shape
-ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
-outputs = client.predict(feed={"image": img}, fetch=["concat_1.tmp_0"])
-dt_boxes_list = post_func(outputs["concat_1.tmp_0"], [ratio_list])
-dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
+for data in test_reader():
+    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
+    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
--- a/python/examples/ocr_detection/7.jpg
+++ b/python/examples/ocr_detection/7.jpg
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -13,16 +13,19 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing

-import paddle_serving_client
 import os
-from .proto import sdk_configure_pb2 as sdk
-from .proto import general_model_config_pb2 as m_config
-import google.protobuf.text_format
-import numpy as np
 import time
 import sys
+import requests
+import json
+import base64
+import numpy as np
+import paddle_serving_client
+import google.protobuf.text_format

 import grpc
+from .proto import sdk_configure_pb2 as sdk
+from .proto import general_model_config_pb2 as m_config
 from .proto import multi_lang_general_model_service_pb2
 sys.path.append(
    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
@@ -161,6 +164,7 @@ class Client(object):
        self.fetch_names_to_idx_ = {}
        self.lod_tensor_set = set()
        self.feed_tensor_len = {}
+        self.key = None

        for i, var in enumerate(model_conf.feed_var):
            self.feed_names_to_idx_[var.alias_name] = i
@@ -193,7 +197,28 @@ class Client(object):
        else:
            self.rpc_timeout_ms = rpc_timeout

-    def connect(self, endpoints=None):
+    def use_key(self, key_filename):
+        with open(key_filename, "r") as f:
+            self.key = f.read()
+
+    def get_serving_port(self, endpoints):
+        if self.key is not None:
+            req = json.dumps({"key": base64.b64encode(self.key)})
+        else:
+            req = json.dumps({})
+        r = requests.post("http://" + endpoints[0], req)
+        result = r.json()
+        print(result)
+        if "endpoint_list" not in result:
+            raise ValueError("server not ready")
+        else:
+            endpoints = [
+                endpoints[0].split(":")[0] + ":" +
+                str(result["endpoint_list"][0])
+            ]
+            return endpoints
+
+    def connect(self, endpoints=None, encryption=False):
        # check whether current endpoint is available
        # init from client config
        # create predictor here
@@ -203,6 +228,8 @@ class Client(object):
                    "You must set the endpoints parameter or use add_variant function to create a variant."
                )
        else:
+            if encryption:
+                endpoints = self.get_serving_port(endpoints)
            if self.predictor_sdk_ is None:
                self.add_variant('default_tag_{}'.format(id(self)), endpoints,
                                 100)

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -21,6 +21,9 @@ from paddle.fluid.framework import Program
 from paddle.fluid import CPUPlace
 from paddle.fluid.io import save_inference_model
 import paddle.fluid as fluid
+from paddle.fluid.core import CipherUtils
+from paddle.fluid.core import CipherFactory
+from paddle.fluid.core import Cipher
 from ..proto import general_model_config_pb2 as model_conf
 import os

@@ -29,7 +32,10 @@ def save_model(server_model_folder,
               client_config_folder,
               feed_var_dict,
               fetch_var_dict,
-               main_program=None):
+               main_program=None,
+               encryption=False,
+               key_len=128,
+               encrypt_conf=None):
    executor = Executor(place=CPUPlace())

    feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
@@ -38,14 +44,29 @@ def save_model(server_model_folder,
    for key in sorted(fetch_var_dict.keys()):
        target_vars.append(fetch_var_dict[key])
        target_var_names.append(key)
-
-    save_inference_model(
-        server_model_folder,
-        feed_var_names,
-        target_vars,
-        executor,
-        main_program=main_program)
-
+    if not encryption:
+        save_inference_model(
+            server_model_folder,
+            feed_var_names,
+            target_vars,
+            executor,
+            main_program=main_program)
+    else:
+        if encrypt_conf == None:
+            aes_cipher = CipherFactory.create_cipher()
+        else:
+            #todo: more encryption algorithms
+            pass
+        key = CipherUtils.gen_key_to_file(128, "key")
+        params = fluid.io.save_persistables(
+            executor=executor, dirname=None, main_program=main_program)
+        model = main_program.desc.serialize_to_string()
+        if not os.path.exists(server_model_folder):
+            os.makedirs(server_model_folder)
+        os.chdir(server_model_folder)
+        aes_cipher.encrypt_to_file(params, key, "encrypt_params")
+        aes_cipher.encrypt_to_file(model, key, "encrypt_model")
+        os.chdir("..")
    config = model_conf.GeneralModelConfig()

    #int64 = 0; float32 = 1; int32 = 2;
@@ -113,7 +134,10 @@ def inference_model_to_serving(dirname,
                               serving_server="serving_server",
                               serving_client="serving_client",
                               model_filename=None,
-                               params_filename=None):
+                               params_filename=None,
+                               encryption=False,
+                               key_len=128,
+                               encrypt_conf=None):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_program, feed_target_names, fetch_targets = \
@@ -124,7 +148,7 @@ def inference_model_to_serving(dirname,
    }
    fetch_dict = {x.name: x for x in fetch_targets}
    save_model(serving_server, serving_client, feed_dict, fetch_dict,
-               inference_program)
+               inference_program, encryption, key_len, encrypt_conf)
    feed_names = feed_dict.keys()
    fetch_names = fetch_dict.keys()
    return feed_names, fetch_names
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -157,6 +157,7 @@ class Server(object):
        self.cur_path = os.getcwd()
        self.use_local_bin = False
        self.mkl_flag = False
+        self.encryption_model = False
        self.product_name = None
        self.container_id = None
        self.model_config_paths = None  # for multi-model in a workflow
@@ -197,6 +198,9 @@ class Server(object):
    def set_ir_optimize(self, flag=False):
        self.ir_optimization = flag

+    def use_encryption_model(self, flag=False):
+        self.encryption_model = flag
+
    def set_product_name(self, product_name=None):
        if product_name == None:
            raise ValueError("product_name can't be None.")
@@ -232,9 +236,15 @@ class Server(object):
            engine.force_update_static_cache = False

            if device == "cpu":
-                engine.type = "FLUID_CPU_ANALYSIS_DIR"
+                if self.encryption_model:
+                    engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT"
+                else:
+                    engine.type = "FLUID_CPU_ANALYSIS_DIR"
            elif device == "gpu":
-                engine.type = "FLUID_GPU_ANALYSIS_DIR"
+                if self.encryption_model:
+                    engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT"
+                else:
+                    engine.type = "FLUID_GPU_ANALYSIS_DIR"

            self.model_toolkit_conf.engines.extend([engine])


--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -18,8 +18,14 @@ Usage:
        python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
 """
 import argparse
-from .web_service import WebService
+import sys
+import json
+import base64
+import time
+from multiprocessing import Process
+from web_service import WebService, port_is_available
 from flask import Flask, request
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer


 def parse_args():  # pylint: disable=doc-string-missing
@@ -53,6 +59,11 @@ def parse_args():  # pylint: disable=doc-string-missing
        type=int,
        default=512 * 1024 * 1024,
        help="Limit sizes of messages")
+    parser.add_argument(
+        "--use_encryption_model",
+        default=False,
+        action="store_true",
+        help="Use encryption model")
    parser.add_argument(
        "--use_multilang",
        default=False,
@@ -71,17 +82,18 @@ def parse_args():  # pylint: disable=doc-string-missing
    return parser.parse_args()


-def start_standard_model():  # pylint: disable=doc-string-missing
+def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
    args = parse_args()
    thread_num = args.thread
    model = args.model
-    port = args.port
+    port = serving_port
    workdir = args.workdir
    device = args.device
    mem_optim = args.mem_optim_off is False
    ir_optim = args.ir_optim
    max_body_size = args.max_body_size
    use_mkl = args.use_mkl
+    use_encryption_model = args.use_encryption_model
    use_multilang = args.use_multilang

    if model == "":
@@ -111,6 +123,7 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    server.use_mkl(use_mkl)
    server.set_max_body_size(max_body_size)
    server.set_port(port)
+    server.use_encryption_model(use_encryption_model)
    if args.product_name != None:
        server.set_product_name(args.product_name)
    if args.container_id != None:
@@ -121,11 +134,88 @@ def start_standard_model():  # pylint: disable=doc-string-missing
    server.run_server()


-if __name__ == "__main__":
+class MainService(BaseHTTPRequestHandler):
+    def get_available_port(self):
+        default_port = 12000
+        for i in range(1000):
+            if port_is_available(default_port + i):
+                return default_port + i
+
+    def start_serving(self):
+        start_standard_model(serving_port)
+
+    def get_key(self, post_data):
+        if "key" not in post_data:
+            return False
+        else:
+            key = base64.b64decode(post_data["key"])
+            with open(args.model + "/key", "w") as f:
+                f.write(key)
+            return True
+
+    def check_key(self, post_data):
+        if "key" not in post_data:
+            return False
+        else:
+            key = base64.b64decode(post_data["key"])
+            with open(args.model + "/key", "r") as f:
+                cur_key = f.read()
+            return (key == cur_key)
+
+    def start(self, post_data):
+        post_data = json.loads(post_data)
+        global p_flag
+        if not p_flag:
+            if args.use_encryption_model:
+                print("waiting key for model")
+                if not self.get_key(post_data):
+                    print("not found key in request")
+                    return False
+            global serving_port
+            global p
+            serving_port = self.get_available_port()
+            p = Process(target=self.start_serving)
+            p.start()
+            time.sleep(3)
+            if p.is_alive():
+                p_flag = True
+            else:
+                return False
+        else:
+            if p.is_alive():
+                if not self.check_key(post_data):
+                    return False
+            else:
+                return False
+        return True
+
+    def do_POST(self):
+        content_length = int(self.headers['Content-Length'])
+        post_data = self.rfile.read(content_length)
+        if self.start(post_data):
+            response = {"endpoint_list": [serving_port]}
+        else:
+            response = {"message": "start serving failed"}
+        self.send_response(200)
+        self.send_header('Content-type', 'application/json')
+        self.end_headers()
+        self.wfile.write(json.dumps(response))
+

+if __name__ == "__main__":
    args = parse_args()
    if args.name == "None":
-        start_standard_model()
+        if args.use_encryption_model:
+            p_flag = False
+            p = None
+            serving_port = 0
+            server = HTTPServer(('localhost', int(args.port)), MainService)
+            print(
+                'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
+            )
+            server.serve_forever()
+        else:
+            start_standard_model(args.port)
    else:
        service = WebService(name=args.name)
        service.load_model_config(args.model)

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -25,6 +25,16 @@ from paddle_serving_server import pipeline
 from paddle_serving_server.pipeline import Op


+def port_is_available(port):
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+        sock.settimeout(2)
+        result = sock.connect_ex(('0.0.0.0', port))
+    if result != 0:
+        return True
+    else:
+        return False
+
+
 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
@@ -110,7 +120,7 @@ class WebService(object):
        self.mem_optim = mem_optim
        self.ir_optim = ir_optim
        for i in range(1000):
-            if self.port_is_available(default_port + i):
+            if port_is_available(default_port + i):
                self.port_list.append(default_port + i)
                break


--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -68,6 +68,11 @@ def serve_args():
        type=int,
        default=512 * 1024 * 1024,
        help="Limit sizes of messages")
+    parser.add_argument(
+        "--use_encryption_model",
+        default=False,
+        action="store_true",
+        help="Use encryption model")
    parser.add_argument(
        "--use_multilang",
        default=False,
@@ -277,7 +282,8 @@ class Server(object):
    def set_trt(self):
        self.use_trt = True

-    def _prepare_engine(self, model_config_paths, device):
+    def _prepare_engine(self, model_config_paths, device, use_encryption_model):
+
        if self.model_toolkit_conf == None:
            self.model_toolkit_conf = server_sdk.ModelToolkitConf()

@@ -299,9 +305,15 @@ class Server(object):
            engine.use_trt = self.use_trt

            if device == "cpu":
-                engine.type = "FLUID_CPU_ANALYSIS_DIR"
+                if use_encryption_model:
+                    engine.type = "FLUID_CPU_ANALYSIS_ENCRPT"
+                else:
+                    engine.type = "FLUID_CPU_ANALYSIS_DIR"
            elif device == "gpu":
-                engine.type = "FLUID_GPU_ANALYSIS_DIR"
+                if use_encryption_model:
+                    engine.type = "FLUID_GPU_ANALYSIS_ENCRPT"
+                else:
+                    engine.type = "FLUID_GPU_ANALYSIS_DIR"

            self.model_toolkit_conf.engines.extend([engine])

@@ -458,6 +470,7 @@ class Server(object):
                       workdir=None,
                       port=9292,
                       device="cpu",
+                       use_encryption_model=False,
                       cube_conf=None):
        if workdir == None:
            workdir = "./tmp"
@@ -471,7 +484,8 @@ class Server(object):

        self.set_port(port)
        self._prepare_resource(workdir, cube_conf)
-        self._prepare_engine(self.model_config_paths, device)
+        self._prepare_engine(self.model_config_paths, device,
+                             use_encryption_model)
        self._prepare_infer_service(port)
        self.workdir = workdir


--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -19,19 +19,21 @@ Usage:
 """
 import argparse
 import os
+import json
+import base64
 from multiprocessing import Pool, Process
 from paddle_serving_server_gpu import serve_args
 from flask import Flask, request
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer


-def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-missing
+def start_gpu_card_model(index, gpuid, port, args):  # pylint: disable=doc-string-missing
    gpuid = int(gpuid)
    device = "gpu"
-    port = args.port
    if gpuid == -1:
        device = "cpu"
    elif gpuid >= 0:
-        port = args.port + index
+        port = port + index
    thread_num = args.thread
    model = args.model
    mem_optim = args.mem_optim_off is False
@@ -73,14 +75,20 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
        server.set_container_id(args.container_id)

    server.load_model_config(model)
-    server.prepare_server(workdir=workdir, port=port, device=device)
+    server.prepare_server(
+        workdir=workdir,
+        port=port,
+        device=device,
+        use_encryption_model=args.use_encryption_model)
    if gpuid >= 0:
        server.set_gpuid(gpuid)
    server.run_server()


-def start_multi_card(args):  # pylint: disable=doc-string-missing
+def start_multi_card(args, serving_port=None):  # pylint: disable=doc-string-missing
    gpus = ""
+    if serving_port == None:
+        serving_port = args.port
    if args.gpu_ids == "":
        gpus = []
    else:
@@ -97,14 +105,16 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing
            env_gpus = []
    if len(gpus) <= 0:
        print("gpu_ids not set, going to run cpu service.")
-        start_gpu_card_model(-1, -1, args)
+        start_gpu_card_model(-1, -1, serving_port, args)
    else:
        gpu_processes = []
        for i, gpu_id in enumerate(gpus):
            p = Process(
-                target=start_gpu_card_model, args=(
+                target=start_gpu_card_model,
+                args=(
                    i,
                    gpu_id,
+                    serving_port,
                    args, ))
            gpu_processes.append(p)
        for p in gpu_processes:
@@ -113,10 +123,89 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing
            p.join()


+class MainService(BaseHTTPRequestHandler):
+    def get_available_port(self):
+        default_port = 12000
+        for i in range(1000):
+            if port_is_available(default_port + i):
+                return default_port + i
+
+    def start_serving(self):
+        start_multi_card(args, serving_port)
+
+    def get_key(self, post_data):
+        if "key" not in post_data:
+            return False
+        else:
+            key = base64.b64decode(post_data["key"])
+            with open(args.model + "/key", "w") as f:
+                f.write(key)
+            return True
+
+    def check_key(self, post_data):
+        if "key" not in post_data:
+            return False
+        else:
+            key = base64.b64decode(post_data["key"])
+            with open(args.model + "/key", "r") as f:
+                cur_key = f.read()
+            return (key == cur_key)
+
+    def start(self, post_data):
+        post_data = json.loads(post_data)
+        global p_flag
+        if not p_flag:
+            if args.use_encryption_model:
+                print("waiting key for model")
+                if not self.get_key(post_data):
+                    print("not found key in request")
+                    return False
+            global serving_port
+            global p
+            serving_port = self.get_available_port()
+            p = Process(target=self.start_serving)
+            p.start()
+            time.sleep(3)
+            if p.is_alive():
+                p_flag = True
+            else:
+                return False
+        else:
+            if p.is_alive():
+                if not self.check_key(post_data):
+                    return False
+            else:
+                return False
+        return True
+
+    def do_POST(self):
+        content_length = int(self.headers['Content-Length'])
+        post_data = self.rfile.read(content_length)
+        if self.start(post_data):
+            response = {"endpoint_list": [serving_port]}
+        else:
+            response = {"message": "start serving failed"}
+        self.send_response(200)
+        self.send_header('Content-type', 'application/json')
+        self.end_headers()
+        self.wfile.write(json.dumps(response))
+
+
 if __name__ == "__main__":
    args = serve_args()
    if args.name == "None":
-        start_multi_card(args)
+        from .web_service import port_is_available
+        if args.use_encryption_model:
+            p_flag = False
+            p = None
+            serving_port = 0
+            server = HTTPServer(('localhost', int(args.port)), MainService)
+            print(
+                'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
+            )
+            server.serve_forever()
+        else:
+            start_multi_card(args)
    else:
        from .web_service import WebService
        web_service = WebService(name=args.name)

--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -28,6 +28,16 @@ from paddle_serving_server_gpu import pipeline
 from paddle_serving_server_gpu.pipeline import Op


+def port_is_available(port):
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+        sock.settimeout(2)
+        result = sock.connect_ex(('0.0.0.0', port))
+    if result != 0:
+        return True
+    else:
+        return False
+
+
 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
@@ -136,7 +146,7 @@ class WebService(object):
        self.port_list = []
        default_port = 12000
        for i in range(1000):
-            if self.port_is_available(default_port + i):
+            if port_is_available(default_port + i):
                self.port_list.append(default_port + i)
            if len(self.port_list) > len(self.gpus):
                break

--- a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel
+++ b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel
@@ -39,6 +39,8 @@ RUN yum -y install wget && \
    make clean && \
    echo 'export PATH=/usr/local/python3.6/bin:$PATH' >> /root/.bashrc && \
    echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
+    pip install requests && \
+    pip3 install requests && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
    wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \

--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -49,6 +49,8 @@ RUN yum -y install wget && \
    cd .. && rm -rf protobuf-* && \
    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all && \
+    pip install requests && \
+    pip3 install requests && \
    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
    echo "export LANG=en_US.utf8" >> /root/.bashrc && \
    echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
@@ -23,7 +23,8 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
 RUN yum -y install python-devel sqlite-devel >/dev/null \
    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
    && python get-pip.py >/dev/null \
-    && rm get-pip.py
+    && rm get-pip.py \
+    && pip install requests  

 RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \
    && yum -y install bzip2 >/dev/null \
@@ -34,6 +35,9 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
    && cd .. \
    && rm -rf patchelf-0.10*

+RUN yum install -y python3 python3-devel \
+    && pip3 install requests
+
 RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
    tar zxf protobuf-all-3.11.2.tar.gz && \
    cd protobuf-3.11.2 && \
@@ -41,8 +45,6 @@ RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/p
    make clean && \
    cd .. && rm -rf protobuf-*

-RUN yum install -y python3 python3-devel
-
 RUN yum -y update >/dev/null \
    && yum -y install dnf >/dev/null \
    && yum -y install dnf-plugins-core >/dev/null \

--- a/tools/Dockerfile.cuda10.0-cudnn7.devel
+++ b/tools/Dockerfile.cuda10.0-cudnn7.devel
@@ -30,11 +30,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
 RUN yum -y install python-devel sqlite-devel  \
    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
    && python get-pip.py >/dev/null \
-    && rm get-pip.py 
+    && rm get-pip.py \
+    && pip install requests

 RUN yum install -y python3 python3-devel \
    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all 
+    && yum clean all \
+    && pip3 install requests

 RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
    && echo "export LANG=en_US.utf8" >> /root/.bashrc \

--- a/tools/Dockerfile.cuda9.0-cudnn7.devel
+++ b/tools/Dockerfile.cuda9.0-cudnn7.devel
@@ -29,11 +29,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
 RUN yum -y install python-devel sqlite-devel  \
    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
    && python get-pip.py >/dev/null \
-    && rm get-pip.py 
+    && rm get-pip.py \
+    && pip install requests

 RUN yum install -y python3 python3-devel \
    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all 
+    && yum clean all \
+    && pip3 install requests

 RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
    && echo "export LANG=en_US.utf8" >> /root/.bashrc \

--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -19,11 +19,13 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
 RUN yum -y install python-devel sqlite-devel  \
    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
    && python get-pip.py >/dev/null \
-    && rm get-pip.py 
+    && rm get-pip.py \
+    && pip install requests 

 RUN yum install -y python3 python3-devel \
    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
-    && yum clean all 
+    && yum clean all \
+    && pip3 install requests

 RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
    && echo "export LANG=en_US.utf8" >> /root/.bashrc \

--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -514,6 +514,40 @@ function python_test_lac() {
    cd ..
 }

+
+function python_test_encryption(){
+    #pwd: /Serving/python/examples
+    cd encryption
+    sh get_data.sh
+    local TYPE=$1
+    export SERVING_BIN=${SERIVNG_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    case $TYPE in
+        CPU)
+            #check_cmd "python encrypt.py"
+            #sleep 5
+            check_cmd "python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model > /dev/null &"
+            sleep 5
+            check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt"
+            kill_server_process
+            ;;
+        GPU)
+            #check_cmd "python encrypt.py"
+            #sleep 5
+            check_cmd "python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0"
+            sleep 5
+            check_cmd "python test_client.py encrypt_client/serving_client_conf.prototxt"
+            kill_servere_process
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "encryption $TYPE test finished as expected"
+    setproxy
+    unset SERVING_BIN
+    cd ..
+}
 function java_run_test() {
    # pwd: /Serving
    local TYPE=$1
@@ -529,7 +563,7 @@ function java_run_test() {
            cd examples # pwd: /Serving/java/examples
            mvn compile > /dev/null
            mvn install > /dev/null
-            
+
            # fit_a_line (general, asyn_predict, batch_predict)
            cd ../../python/examples/grpc_impl_example/fit_a_line # pwd: /Serving/python/examples/grpc_impl_example/fit_a_line
            sh get_data.sh
@@ -786,7 +820,7 @@ function python_test_pipeline(){
            python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 --workdir test9292 &> cnn.log &
            python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 --workdir test9393 &> bow.log &
            sleep 5
-            
+
            # test: thread servicer & thread op
            cat << EOF > config.yml
 rpc_port: 18080
@@ -960,6 +994,7 @@ function python_run_test() {
    python_test_lac $TYPE # pwd: /Serving/python/examples
    python_test_multi_process $TYPE # pwd: /Serving/python/examples
    python_test_multi_fetch $TYPE # pwd: /Serving/python/examples
+    python_test_encryption $TYPE # pwd: /Serving/python/examples
    python_test_yolov4 $TYPE # pwd: /Serving/python/examples
    python_test_grpc_impl $TYPE # pwd: /Serving/python/examples
    python_test_resnet50 $TYPE # pwd: /Serving/python/examples