diff --git a/doc/HTTP_SERVICE_CN.md b/doc/HTTP_SERVICE_CN.md
index ff7082b0c6c2f091a199420be45ce83403befdd4..a839039bac48e5c23c8c8d1571953365315b7bd8 100644
--- a/doc/HTTP_SERVICE_CN.md
+++ b/doc/HTTP_SERVICE_CN.md
@@ -12,7 +12,7 @@ BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据
 
 ### Http+protobuf方式
 各种语言都提供了对ProtoBuf的支持，如果您对此比较熟悉，您也可以先将数据使用ProtoBuf序列化，再将序列化后的数据放入Http请求数据体中，然后指定Content-Type: application/proto，从而使用http/h2+protobuf二进制串访问服务。
-实测随着数据量的增大，使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加，推荐当您的数据量较大时，使用Http+protobuf方式，后续我们会在框架的HttpClient中增加该功能，目前暂没有支持。
+实测随着数据量的增大，使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加，推荐当您的数据量较大时，使用Http+protobuf方式，目前已经在Java和Python的Client端提供了支持。
 
 **理论上讲，序列化/反序列化的性能从高到底排序为：protobuf > http/h2+protobuf > http**
 
diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md
index 99aadde64455fbcc1def1baf5dcb6516c0dc8e69..5d3242837f6d8be08f321d68890587e4bba725e8 100755
--- a/python/examples/bert/README.md
+++ b/python/examples/bert/README.md
@@ -45,22 +45,22 @@ this script will download Chinese Dictionary File vocab.txt and Chinese Sample D
 ### Inference Service(Support BRPC-Client、GRPC-Client、Http-Client)
 start cpu inference service,Run
 ```
-python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #cpu inference service
+python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #cpu inference service
 ```
 Or,start gpu inference service,Run
 ```
-python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
+python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
 ```
 
 ### BRPC-Client Inference
 
 before prediction we should install paddle_serving_app. This module provides data preprocessing for BERT model.
 ```
-pip install paddle_serving_app
+pip3 install paddle_serving_app
 ```
 Run
 ```
-head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
+head data-c.txt | python3 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
 ```
 
 the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it).
@@ -68,7 +68,7 @@ the client reads data from data-c.txt and send prediction request, the predictio
 #### GRPC-Client/HTTP-Client
 Run
 ```
-head data-c.txt | python bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
+head data-c.txt | python3 bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
 
 ```
 
diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md
index e0601f5f0a15c243ef8705529c4a68ab255de360..42bc3ffab0ad51e304b11a78634b5a90415d1ace 100755
--- a/python/examples/bert/README_CN.md
+++ b/python/examples/bert/README_CN.md
@@ -43,12 +43,12 @@ sh get_data.sh
 ### 启动预测服务（支持BRPC-Client、GRPC-Client、HTTP-Client三种方式访问）
 启动cpu预测服务，执行
 ```
-python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #启动cpu预测服务
+python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292  #启动cpu预测服务
 
 ```
 或者，启动gpu预测服务，执行
 ```
-python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
+python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
 
 ```
 
@@ -56,13 +56,13 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --g
 
 执行预测前需要安装paddle_serving_app，模块中提供了BERT模型的数据预处理方法。
 ```
-pip install paddle_serving_app
+pip3 install paddle_serving_app
 ```
 
 #### BRPC-Client
 执行
 ```
-head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
+head data-c.txt | python3 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
 
 ```
 启动client读取data-c.txt中的数据进行预测，预测结果为文本的向量表示（由于数据较多，脚本中没有将输出进行打印），server端的地址在脚本中修改。
@@ -70,7 +70,7 @@ head data-c.txt | python bert_client.py --model bert_seq128_client/serving_clien
 #### GRPC-Client/HTTP-Client
 执行
 ```
-head data-c.txt | python bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
+head data-c.txt | python3 bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
 
 ```
 
diff --git a/python/examples/blazeface/README.md b/python/examples/blazeface/README.md
index 6f9d3c5adab5f3275989479078cb4329d14589fd..29e3026b4d972e141eabcc1a180d7a5cdb804a52 100644
--- a/python/examples/blazeface/README.md
+++ b/python/examples/blazeface/README.md
@@ -2,7 +2,7 @@
 
 ## Get Model
 ```
-python -m paddle_serving_app.package --get_model blazeface
+python3 -m paddle_serving_app.package --get_model blazeface
 tar -xf blazeface.tar.gz
 ```
 
@@ -11,13 +11,13 @@ tar -xf blazeface.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model serving_server --port 9494
+python3 -m paddle_serving_server.serve --model serving_server --port 9494
 ```
 
 ### Client Prediction
 
 ```
-python test_client.py serving_client/serving_client_conf.prototxt test.jpg
+python3 test_client.py serving_client/serving_client_conf.prototxt test.jpg
 ```
 
 the result is in `output` folder, including a json file and image file with bounding boxes.
diff --git a/python/examples/cascade_rcnn/README.md b/python/examples/cascade_rcnn/README.md
index f8aa79e8bf97da5dd998ac6d340c0abd398931c0..0f831a400a04db1c5c38c76fd911fee4831f8779 100644
--- a/python/examples/cascade_rcnn/README.md
+++ b/python/examples/cascade_rcnn/README.md
@@ -10,12 +10,12 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
 
 ### Start the service
 ```
-python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
 ```
 
 ### Perform prediction
 ```
-python test_client.py 
+python3 test_client.py 
 ```
 
 Image with bounding boxes and json result would be saved in `output` folder.
diff --git a/python/examples/cascade_rcnn/README_CN.md b/python/examples/cascade_rcnn/README_CN.md
index 99606de41812cb591a46e443c8a2f72c30ba19e0..0cc65ed681416de3bacd0edb1a0226b085c24faa 100644
--- a/python/examples/cascade_rcnn/README_CN.md
+++ b/python/examples/cascade_rcnn/README_CN.md
@@ -10,12 +10,12 @@ sh get_data.sh
 
 ### 启动服务
 ```
-python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
 ```
 
 ### 执行预测
 ```
-python test_client.py
+python3 test_client.py
 ```
 
 客户端已经为图片做好了后处理，在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
diff --git a/python/examples/criteo_ctr/README.md b/python/examples/criteo_ctr/README.md
index 46be4d0ae9d3167bc107ec45b0000520920d6dea..6c1d79e7362a0240a49a9f0243f3de3340119ce3 100644
--- a/python/examples/criteo_ctr/README.md
+++ b/python/examples/criteo_ctr/README.md
@@ -19,13 +19,13 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear.
 ### Start RPC Inference Service
 
 ```
-python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
-python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
+python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
+python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
 ```
 
 ### RPC Infer
 
 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
+python3 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
 ```
 the latency will display in the end.
diff --git a/python/examples/criteo_ctr/README_CN.md b/python/examples/criteo_ctr/README_CN.md
index c7d6255e0b21aa447c5decc823a9bbb5bdb4ad65..c5b1da76055e64bd08bcf2a00dffe537bc931ee9 100644
--- a/python/examples/criteo_ctr/README_CN.md
+++ b/python/examples/criteo_ctr/README_CN.md
@@ -19,13 +19,13 @@ mv models/ctr_serving_model .
 ### 启动RPC预测服务
 
 ```
-python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
-python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
+python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
+python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
 ```
 
 ### 执行预测
 
 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
+python3 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
 ```
 预测完毕会输出预测过程的耗时。
diff --git a/python/examples/criteo_ctr_with_cube/README.md b/python/examples/criteo_ctr_with_cube/README.md
index 493b3d72c1fff9275c2a99cfee45efd4bef1af4c..de5c3269228a8d7ef619a8c46f2252208e53b982 100755
--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -32,13 +32,13 @@ Here, the sparse parameter is loaded by cube sparse parameter indexing service C
 ### Start RPC Predictor, the number of serving thread is 4（configurable in test_server.py）
 
 ```
-python test_server.py ctr_serving_model_kv 
+python3 test_server.py ctr_serving_model_kv 
 ```
 
 ### Run Prediction
 
 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
+python3 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
 ```
 
 ### Benchmark
diff --git a/python/examples/criteo_ctr_with_cube/README_CN.md b/python/examples/criteo_ctr_with_cube/README_CN.md
index 7a0eb43c203aafeb38b64d249954cdabf7bf7a38..15d61160317f866aae25a4d777d76e14725424d3 100644
--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
@@ -30,13 +30,13 @@ sh cube_prepare.sh &
 ### 启动RPC预测服务，服务端线程数为4（可在test_server.py配置）
 
 ```
-python test_server.py ctr_serving_model_kv 
+python3 test_server.py ctr_serving_model_kv 
 ```
 
 ### 执行预测
 
 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
+python3 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
 ```
 
 ### Benchmark
diff --git a/python/examples/deeplabv3/README.md b/python/examples/deeplabv3/README.md
index 28bec77bb500e42919734433617ea2df1b9e95c0..08022618fcec5220667ca19bfb803cba36519c7b 100644
--- a/python/examples/deeplabv3/README.md
+++ b/python/examples/deeplabv3/README.md
@@ -3,7 +3,7 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model deeplabv3
+python3 -m paddle_serving_app.package --get_model deeplabv3
 tar -xzvf deeplabv3.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf deeplabv3.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+python3 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
 ```
 
 ### Client Prediction
 
 ```
-python deeplabv3_client.py
+python3 deeplabv3_client.py
 ```
diff --git a/python/examples/deeplabv3/README_CN.md b/python/examples/deeplabv3/README_CN.md
index 6de3c420833d31f871ad79122e1d77aee4208e35..16f11daba354349f1b73f8bba00cac8ff5c88864 100644
--- a/python/examples/deeplabv3/README_CN.md
+++ b/python/examples/deeplabv3/README_CN.md
@@ -3,7 +3,7 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model deeplabv3
+python3 -m paddle_serving_app.package --get_model deeplabv3
 tar -xzvf deeplabv3.tar.gz
 ```
 
@@ -12,10 +12,10 @@ tar -xzvf deeplabv3.tar.gz
 ### 启动服务端
 
 ```
-python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
+python3 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
 ```
 
 ### 客户端预测
 
 ```
-python deeplabv3_client.py
+python3 deeplabv3_client.py
diff --git a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
index ff4eb10139b4843c81fa2a256f3e6ff116e32472..ebb8b9d87307c82543f3a5de977bb997ddeb79e1 100644
--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf faster_rcnn_hrnetv2p_w18_1x.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
@@ -19,5 +19,5 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m
 
 ### Prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
index 4bd51128f4a5cb8c09b624c5a1f3dc82b5556b23..5be20d68ae3ecbcb45440659eb917a7b5b0d0ee1 100644
--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf faster_rcnn_hrnetv2p_w18_1x.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
 请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
index 1fb0dfeecc6e82045bcaa026412f561e8a43908e..d56aa416b9e54114646f9271c27f6afde7d41259 100644
--- a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
+++ b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf faster_rcnn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
@@ -19,7 +19,7 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
 
 ## 3. Result analysis
diff --git a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
index 7617df7aa0c732d047e7cbd056f93e6a16f403d6..f8475daf029ae2230432871237281970052fe3e3 100644
--- a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
+++ b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
@@ -11,14 +11,14 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf faster_rcnn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
 请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
 
 ## 3. 结果分析
diff --git a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md
index d0cdb1582584cb7e0e95d00231c2c8a5fb33d464..5612b754ae9610ed351a4becfec6b47bdcb57c8d 100644
--- a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md
+++ b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README.md
@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf fcos_dcn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md
index 56c2505c8c7ee2be7627a2f6fd9e108868428805..d9737261632c64172684fea0d60c566f242e95e6 100644
--- a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md
+++ b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/README_CN.md
@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf fcos_dcn_r50_fpn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md
index 8c3d5142ad2a88dc151478965e41def5075e4b2f..8060e087107e54bc401849fd576497e9fc9cd421 100644
--- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md
+++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README.md
@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf ppyolo_r50vd_dcn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md
index 1aebb8db9a0a3b3523d233a70ff42afe4f40a610..3071db7b124fd998d15901be7a78a67018d0de0f 100644
--- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md
+++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/README_CN.md
@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf ppyolo_r50vd_dcn_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/detection/ssd_vgg16_300_240e_voc/README.md b/python/examples/detection/ssd_vgg16_300_240e_voc/README.md
index 062941bfb8deff3a09c938e9c43cd2b710cbb0e5..60a22fdb5d3c1486827376d935c4f39de1b2c387 100644
--- a/python/examples/detection/ssd_vgg16_300_240e_voc/README.md
+++ b/python/examples/detection/ssd_vgg16_300_240e_voc/README.md
@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf ssd_vgg16_300_240e_voc.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md b/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md
index 32c19b5159a497e52df1c5fd01a87fd43f7d67e4..a2e0d187a5e896f796dec4ed0dbdcb3af4ed5334 100644
--- a/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md
+++ b/python/examples/detection/ssd_vgg16_300_240e_voc/README_CN.md
@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf ssd_vgg16_300_240e_voc.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
diff --git a/python/examples/detection/ttfnet_darknet53_1x_coco/README.md b/python/examples/detection/ttfnet_darknet53_1x_coco/README.md
index 58c538b7cdc5ff7975b57d292b1d8b0c7d5dd2b7..d6ffb912c45d94a85cc6a546f3bce6c690e1f2fe 100644
--- a/python/examples/detection/ttfnet_darknet53_1x_coco/README.md
+++ b/python/examples/detection/ttfnet_darknet53_1x_coco/README.md
@@ -10,12 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf ttfnet_darknet53_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md b/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md
index 641086cd2eba4b274325bca47791a60c6a5ec97f..7a1d3d1abb81e389542a6e5dfd65befa59c402a5 100644
--- a/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md
+++ b/python/examples/detection/ttfnet_darknet53_1x_coco/README_CN.md
@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf ttfnet_darknet53_1x_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/detection/yolov3_darknet53_270e_coco/README.md b/python/examples/detection/yolov3_darknet53_270e_coco/README.md
index 6357c3030a5936b4ec9105860dd63144bfd8098e..32670748db42336053d01e61bf087d00c03c7e06 100644
--- a/python/examples/detection/yolov3_darknet53_270e_coco/README.md
+++ b/python/examples/detection/yolov3_darknet53_270e_coco/README.md
@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf yolov3_darknet53_270e_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 This model support TensorRT, if you want a faster inference, please use `--use_trt`.
 
 ### Perform prediction
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md b/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md
index 166d562e79a91bbc59cd7dc15b7e5667f4e0cb27..4185e0fe4963113ed0f9c0ea865705fd33226d1b 100644
--- a/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md
+++ b/python/examples/detection/yolov3_darknet53_270e_coco/README_CN.md
@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### 启动服务
 ```
 tar xf yolov3_darknet53_270e_coco.tar
-python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
 ```
 
 该模型支持TensorRT，如果想要更快的预测速度，可以开启`--use_trt`选项。
 
 ### 执行预测
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
-
diff --git a/python/examples/encryption/README.md b/python/examples/encryption/README.md
index 0d92604d15070df35e0125d084e7c68e1b36ae1b..3120422ebfaa2a88851eda18c42e7740fe29e884 100644
--- a/python/examples/encryption/README.md
+++ b/python/examples/encryption/README.md
@@ -12,9 +12,9 @@ sh get_data.sh
 
 ## Encrypt Model
 
-The `paddlepaddle` package is used in this example, you may need to download the corresponding package(`pip install paddlepaddle`).
+The `paddlepaddle` package is used in this example, you may need to download the corresponding package(`pip3 install paddlepaddle`).
 
-[python encrypt.py](./encrypt.py)
+[python3 encrypt.py](./encrypt.py)
 
 [//file]:#encrypt.py
 ``` python
@@ -35,14 +35,14 @@ client-side configuration file are stored in the `encrypt_client` directory.
 ## Start Encryption Service
 CPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
+python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
+python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```
 
 ## Prediction
 ```
-python test_client.py encrypt_client/serving_client_conf.prototxt
+python3 test_client.py encrypt_client/serving_client_conf.prototxt
 ```
diff --git a/python/examples/encryption/README_CN.md b/python/examples/encryption/README_CN.md
index b6f8fb8411a8d93097a9e1dc28393096f3ebccc2..ad82d49b61cb70093a9423ad83dbc30663b6d4f1 100644
--- a/python/examples/encryption/README_CN.md
+++ b/python/examples/encryption/README_CN.md
@@ -11,9 +11,9 @@ sh get_data.sh
 ```
 
 ## 模型加密
-本示例中使用了`paddlepaddle`包中的模块，需要进行下载（`pip install paddlepaddle`）。
+本示例中使用了`paddlepaddle`包中的模块，需要进行下载（`pip3 install paddlepaddle`）。
 
-运行[python encrypt.py](./encrypt.py)进行模型加密
+运行[python3 encrypt.py](./encrypt.py)进行模型加密
 
 [//file]:#encrypt.py
 ``` python
@@ -36,14 +36,14 @@ def serving_encryption():
 ## 启动加密预测服务
 CPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
+python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
+python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```
 
 ## 预测
 ```
-python test_client.py encrypt_client/serving_client_conf.prototxt
+python3 test_client.py encrypt_client/serving_client_conf.prototxt
 ```
diff --git a/python/examples/fit_a_line/README.md b/python/examples/fit_a_line/README.md
index 3a16316ea8b0bafdaa43736e11662d8c6b5165f5..9586cd670240eb43e4a706ff89ea435b7a8c6d1c 100644
--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
@@ -15,22 +15,22 @@ sh get_data.sh
 ### Start server
 
 ```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
 ```
 
 ## Client prediction
 
 ### RPC Client
-The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
+The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip3 install paddlepaddle`).
 
 ``` shell
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python3 test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 ### Http Client
 
 ``` shell
-python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
+python3 test_httpclient.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 
diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md
index 1176448dd28e78b56dbbd052b5284022e0a39f89..d1cace5e2c5b5cee2195deaa1667af68e5f1f067 100755
--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
@@ -12,23 +12,23 @@ sh get_data.sh
 ## 开启服务端（支持BRPC-Client/GRPC Client/Http-Client）
 
 ```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
 ```
 
 ## 客户端预测
 
 ### BRPC-Client
 
-`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
+`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip3 install paddlepaddle`）。
 
 ``` shell
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python3 test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 ### GRPC-Client/Http-Client
 
 ``` shell
-python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
+python3 test_httpclient.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 
diff --git a/python/examples/imagenet/README.md b/python/examples/imagenet/README.md
index 661d894b6933d9c63921f9540b73306fb25c3fed..eaff522a5ae31eab08786489cbce0fa83f85e91d 100755
--- a/python/examples/imagenet/README.md
+++ b/python/examples/imagenet/README.md
@@ -12,7 +12,7 @@ sh get_model.sh
 ### Install preprocess module
 
 ```
-pip install paddle_serving_app
+pip3 install paddle_serving_app
 ```
 
 
@@ -20,22 +20,22 @@ pip install paddle_serving_app
 
 launch server side
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
+python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
 ```
 
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
+python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
 ```
 
 ### BRPC-Client
 client send inference request
 ```
-python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python3 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *the port of server side in this example is 9696
 
 ### GRPC-Client/Http-Client
 client send inference request
 ```
-python resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python3 resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
diff --git a/python/examples/imagenet/README_CN.md b/python/examples/imagenet/README_CN.md
index 737b1513d68a65e1dcbdb04da7e72114722965b4..642bee3d0cbab98a48f2f09284ea887751752667 100755
--- a/python/examples/imagenet/README_CN.md
+++ b/python/examples/imagenet/README_CN.md
@@ -12,24 +12,24 @@ sh get_model.sh
 ### 安装数据预处理模块
 
 ```
-pip install paddle_serving_app
+pip3 install paddle_serving_app
 ```
 
 ### 启动服务端（支持BRPC-Client、GRPC-Client、Http-Client）
 
 启动server端
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
+python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
 ```
 
 ```
-python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
+python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
 ```
 
 ### BRPC-Client预测
 client端进行预测
 ```
-python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python3 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
 *server端示例中服务端口为9696端口
 
@@ -37,5 +37,5 @@ python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prot
 ### GRPC-Client/Http-Client预测
 client端进行预测
 ```
-python resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
+python3 resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
 ```
diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md
index b7c1132108f6ba7038eb55f7b160c7bc22a048b3..573ac47db37d23406e66fb1605ac60ea58189ffa 100755
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -12,11 +12,11 @@ the package downloaded contains cnn, lstm and bow model config along with their
 ### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
 
 ```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
+python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
 ```
 ### BRPC-Client Infer
 ```
-head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
 
 it will get predict results of the first 10 test cases.
@@ -24,5 +24,5 @@ it will get predict results of the first 10 test cases.
 
 ### GRPC-Client/Http-Client Infer
 ```
-head test_data/part-0 | python test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
diff --git a/python/examples/imdb/README_CN.md b/python/examples/imdb/README_CN.md
index 2c71ce8763bc42724c3c4b779768d1b480f1a9ac..a1fecc8af35dcd2f5a38f47480b9b80b3cf96054 100755
--- a/python/examples/imdb/README_CN.md
+++ b/python/examples/imdb/README_CN.md
@@ -12,15 +12,15 @@ sh get_data.sh
 ### 启动预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
 
 ```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
+python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
 ```
 ### BRPC-Client预测
 ```
-head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
 预测test_data/part-0的前十个样例。
 
 ### BRPC-Client预测
 ```
-head test_data/part-0 | python test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
+head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
diff --git a/python/examples/lac/README.md b/python/examples/lac/README.md
index bbc681ba388e312ec518a0a5ac3adbf724620194..108d5051b50b2b639e28c023364d36ec9a0a0a44 100755
--- a/python/examples/lac/README.md
+++ b/python/examples/lac/README.md
@@ -4,23 +4,23 @@
 
 ### Get Model
 ```
-python -m paddle_serving_app.package --get_model lac
+python3 -m paddle_serving_app.package --get_model lac
 tar -xzvf lac.tar.gz
 ```
 
 #### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
 
 ```
-python -m paddle_serving_server.serve --model lac_model/ --port 9292
+python3 -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### BRPC Infer
 ```
-echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
+echo "我爱北京天安门" | python3 lac_client.py lac_client/serving_client_conf.prototxt
 ```
 
 It will get the segmentation result. 
 
 ### GRPC/Http Infer
 ```
-echo "我爱北京天安门" | python lac_http_client.py lac_client/serving_client_conf.prototxt
+echo "我爱北京天安门" | python3 lac_http_client.py lac_client/serving_client_conf.prototxt
 ```
diff --git a/python/examples/lac/README_CN.md b/python/examples/lac/README_CN.md
index 568edf5c52b57aeb24b404b0841e25321d824253..5634128c80c23126836677f4cb434df68dde9056 100755
--- a/python/examples/lac/README_CN.md
+++ b/python/examples/lac/README_CN.md
@@ -4,23 +4,23 @@
 
 ### 获取模型
 ```
-python -m paddle_serving_app.package --get_model lac
+python3 -m paddle_serving_app.package --get_model lac
 tar -xzvf lac.tar.gz
 ```
 
 #### 开启预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
 
 ```
-python -m paddle_serving_server.serve --model lac_model/ --port 9292
+python3 -m paddle_serving_server.serve --model lac_model/ --port 9292
 ```
 ### 执行BRPC预测
 ```
-echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt
+echo "我爱北京天安门" | python3 lac_client.py lac_client/serving_client_conf.prototxt
 ```
 
 我们就能得到分词结果
 
 ### 执行GRPC/Http预测
 ```
-echo "我爱北京天安门" | python lac_http_client.py lac_client/serving_client_conf.prototxt
+echo "我爱北京天安门" | python3 lac_http_client.py lac_client/serving_client_conf.prototxt
 ```
diff --git a/python/examples/low_precision/resnet50/README.md b/python/examples/low_precision/resnet50/README.md
index 9e1ff16c676b067437183e6e19446e8a526feed5..b4ae2552c3dcd1c30c67b5731d81095e05ca9a86 100644
--- a/python/examples/low_precision/resnet50/README.md
+++ b/python/examples/low_precision/resnet50/README.md
@@ -11,15 +11,15 @@ Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bce
 wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
 tar zxvf ResNet50_quant.tar.gz
 
-python -m paddle_serving_client.convert --dirname ResNet50_quant
+python3 -m paddle_serving_client.convert --dirname ResNet50_quant
 ```
 Start RPC service, specify the GPU id and precision mode
 ```
-python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 
+python3 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 
 ```
 Request the serving service with Client
 ```
-python resnet50_client.py
+python3 resnet50_client.py
 ```
 
 ## Reference
diff --git a/python/examples/low_precision/resnet50/README_CN.md b/python/examples/low_precision/resnet50/README_CN.md
index 1c1a3be1de1690e9736d994016ac05cfba12bcab..648b64dd2b0a5089ce8539c42c0222862e89d8f3 100644
--- a/python/examples/low_precision/resnet50/README_CN.md
+++ b/python/examples/low_precision/resnet50/README_CN.md
@@ -10,15 +10,15 @@
 wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
 tar zxvf ResNet50_quant.tar.gz
 
-python -m paddle_serving_client.convert --dirname ResNet50_quant
+python3 -m paddle_serving_client.convert --dirname ResNet50_quant
 ```
 启动rpc服务, 设定所选GPU id、部署模型精度
 ```
-python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 
+python3 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 
 ```
 使用client进行请求
 ```
-python resnet50_client.py
+python3 resnet50_client.py
 ```
 
 ## 参考文档
diff --git a/python/examples/mobilenet/README.md b/python/examples/mobilenet/README.md
index 4a808026af0ca5cc1920a292c3f85c82962a3f41..1a16b749220bdf8e6db0dd8950fc505620cbc8fc 100644
--- a/python/examples/mobilenet/README.md
+++ b/python/examples/mobilenet/README.md
@@ -3,7 +3,7 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+python3 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
 tar -xzvf mobilenet_v2_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+python3 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
 ```
 
 ### Client Prediction
 
 ```
-python mobilenet_tutorial.py
+python3 mobilenet_tutorial.py
 ```
diff --git a/python/examples/mobilenet/README_CN.md b/python/examples/mobilenet/README_CN.md
index d4f91837ec5e03c4ef32041580e5d6b30039480e..68474e5d80afdec183cb5bac0e9ebfc13a7f9ac6 100644
--- a/python/examples/mobilenet/README_CN.md
+++ b/python/examples/mobilenet/README_CN.md
@@ -3,7 +3,7 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
+python3 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
 tar -xzvf mobilenet_v2_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
 ### 启动服务端
 
 ```
-python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
+python3 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
 ```
 
 ### 客户端预测
 
 ```
-python mobilenet_tutorial.py
+python3 mobilenet_tutorial.py
 ```
diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md
index dfa836fdc7ae29747ad400d21e585a775e0593e4..630f01d999943b9948e153430b30d80fbabd0549 100644
--- a/python/examples/ocr/README.md
+++ b/python/examples/ocr/README.md
@@ -4,9 +4,9 @@
 
 ## Get Model
 ```
-python -m paddle_serving_app.package --get_model ocr_rec
+python3 -m paddle_serving_app.package --get_model ocr_rec
 tar -xzvf ocr_rec.tar.gz
-python -m paddle_serving_app.package --get_model ocr_det
+python3 -m paddle_serving_app.package --get_model ocr_det
 tar -xzvf ocr_det.tar.gz
 ```
 
@@ -23,16 +23,16 @@ tar xf test_imgs.tar
 ```
 #choose one of cpu/gpu commands as following
 #for cpu user
-python -m paddle_serving_server.serve --model ocr_det_model --port 9293
-python ocr_web_server.py cpu
+python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293
+python3 ocr_web_server.py cpu
 #for gpu user
-python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
-python ocr_web_server.py gpu
+python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
+python3 ocr_web_server.py gpu
 ```
 
 ### Client Prediction
 ```
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 If you want a faster web service, please try Web LocalPredictor Service
 
@@ -40,14 +40,14 @@ If you want a faster web service, please try Web LocalPredictor Service
 ```
 #choose one of cpu/gpu commands as following
 #for cpu user
-python ocr_debugger_server.py cpu
+python3 ocr_debugger_server.py cpu
 #for gpu user
-python ocr_debugger_server.py gpu 
+python3 ocr_debugger_server.py gpu 
 ```
 
 ## Web LocalPredictor Client Prediction
 ```
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 
 ## Benchmark
@@ -69,34 +69,34 @@ if you are going to detect images not recognize it or directly recognize the wor
 ### Det Server 
 
 ```
-python det_web_server.py cpu #for cpu user
-python det_web_server.py gpu #for gpu user
+python3 det_web_server.py cpu #for cpu user
+python3 det_web_server.py gpu #for gpu user
 #or
-python det_debugger_server.py cpu #for cpu user
-python det_debugger_server.py gpu #for gpu user
+python3 det_debugger_server.py cpu #for cpu user
+python3 det_debugger_server.py gpu #for gpu user
 ```
 
 ### Det Client
 
 ```
 # also use ocr_web_client.py
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 
 ### Rec Server
 
 ```
-python rec_web_server.py cpu #for cpu user
-python rec_web_server.py gpu #for gpu user
+python3 rec_web_server.py cpu #for cpu user
+python3 rec_web_server.py gpu #for gpu user
 #or
-python rec_debugger_server.py cpu #for cpu user
-python rec_debugger_server.py gpu #for gpu user
+python3 rec_debugger_server.py cpu #for cpu user
+python3 rec_debugger_server.py gpu #for gpu user
 ```
 
 ### Rec Client
 
 ```
-python rec_web_client.py
+python3 rec_web_client.py
 ```
 
 ## C++ OCR Service
@@ -109,9 +109,9 @@ Select a startup mode according to CPU / GPU device
 After the -- model parameter, the folder path of multiple model files is passed in to start the prediction service of multiple model concatenation.
 ```
 #for cpu user
-python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
 #for gpu user
-python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
 ```
 
 ### Client Prediction
@@ -123,5 +123,5 @@ for this case, `feed_type` should be 3(which means the data type is string),`sha
 
 By passing in multiple client folder paths, the client can be started for multi model prediction.
 ```
-python ocr_cpp_client.py ocr_det_client ocr_rec_client
+python3 ocr_cpp_client.py ocr_det_client ocr_rec_client
 ```
diff --git a/python/examples/ocr/README_CN.md b/python/examples/ocr/README_CN.md
index 7e02109252c37bb972b36214713a65dd334524dd..421a4b930507abd3d36ef6db737f85a060647ced 100644
--- a/python/examples/ocr/README_CN.md
+++ b/python/examples/ocr/README_CN.md
@@ -4,9 +4,9 @@
 
 ## 获取模型
 ```
-python -m paddle_serving_app.package --get_model ocr_rec
+python3 -m paddle_serving_app.package --get_model ocr_rec
 tar -xzvf ocr_rec.tar.gz
-python -m paddle_serving_app.package --get_model ocr_det
+python3 -m paddle_serving_app.package --get_model ocr_det
 tar -xzvf ocr_det.tar.gz
 ```
 ## 获取数据集（可选）
@@ -22,16 +22,16 @@ tar xf test_imgs.tar
 ```
 #根据CPU/GPU设备选择一种启动方式
 #for cpu user
-python -m paddle_serving_server.serve --model ocr_det_model --port 9293
-python ocr_web_server.py cpu
+python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293
+python3 ocr_web_server.py cpu
 #for gpu user
-python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
-python ocr_web_server.py gpu
+python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
+python3 ocr_web_server.py gpu
 ```
 
 ### 启动客户端
 ```
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 
 如果用户需要更快的执行速度，请尝试LocalPredictor版Web服务
@@ -39,14 +39,14 @@ python ocr_web_client.py
 ```
 #根据CPU/GPU设备选择一种启动方式
 #for cpu user
-python ocr_debugger_server.py cpu
+python3 ocr_debugger_server.py cpu
 #for gpu user
-python ocr_debugger_server.py gpu
+python3 ocr_debugger_server.py gpu
 ```
 
 ## 启动客户端
 ```
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 
 ## 性能指标
@@ -69,34 +69,34 @@ GPU: Nvidia Tesla V100单卡
 ### 启动检测服务
 
 ```
-python det_web_server.py cpu #for cpu user
-python det_web_server.py gpu #for gpu user
+python3 det_web_server.py cpu #for cpu user
+python3 det_web_server.py gpu #for gpu user
 #or
-python det_debugger_server.py cpu #for cpu user
-python det_debugger_server.py gpu #for gpu user
+python3 det_debugger_server.py cpu #for cpu user
+python3 det_debugger_server.py gpu #for gpu user
 ```
 
 ### 检测服务客户端
 
 ```
 # also use ocr_web_client.py
-python ocr_web_client.py
+python3 ocr_web_client.py
 ```
 
 ### 启动识别服务
 
 ```
-python rec_web_server.py cpu #for cpu user
-python rec_web_server.py gpu #for gpu user
+python3 rec_web_server.py cpu #for cpu user
+python3 rec_web_server.py gpu #for gpu user
 #or
-python rec_debugger_server.py cpu #for cpu user
-python rec_debugger_server.py gpu #for gpu user
+python3 rec_debugger_server.py cpu #for cpu user
+python3 rec_debugger_server.py gpu #for gpu user
 ```
 
 ### 识别服务客户端
 
 ```
-python rec_web_client.py
+python3 rec_web_client.py
 ```
 ## C++ OCR Service服务
 
@@ -108,9 +108,9 @@ python rec_web_client.py
 通过--model后，指定多个模型文件的文件夹路径来启动多模型串联的预测服务。
 ```
 #for cpu user
-python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
 #for gpu user
-python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
 ```
 
 ### 启动客户端
@@ -122,5 +122,5 @@ python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port
 
 通过在客户端启动后加入多个client模型的client配置文件夹路径，启动client进行预测。
 ```
-python ocr_cpp_client.py ocr_det_client ocr_rec_client
+python3 ocr_cpp_client.py ocr_det_client ocr_rec_client
 ```
diff --git a/python/examples/pipeline/PaddleClas/DarkNet53/README.md b/python/examples/pipeline/PaddleClas/DarkNet53/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/DarkNet53/README.md
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/DarkNet53/README_CN.md b/python/examples/pipeline/PaddleClas/DarkNet53/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/DarkNet53/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.py b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.py
index d643b90f5b7ac6ef6892bb83e7dfb20b650df49b..71b5219441a536789e02e4549c84a5cd550bc70f 100644
--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/DarkNet53/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/DarkNet53/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/DarkNet53/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/HRNet_W18_C/README.md b/python/examples/pipeline/PaddleClas/HRNet_W18_C/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/README.md
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/HRNet_W18_C/README_CN.md b/python/examples/pipeline/PaddleClas/HRNet_W18_C/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark.py b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/HRNet_W18_C/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/HRNet_W18_C/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/HRNet_W18_C/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/HRNet_W18_C/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV1/README.md b/python/examples/pipeline/PaddleClas/MobileNetV1/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/README.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV1/README_CN.md b/python/examples/pipeline/PaddleClas/MobileNetV1/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark.py b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV1/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/MobileNetV1/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV1/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/MobileNetV1/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV2/README.md b/python/examples/pipeline/PaddleClas/MobileNetV2/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/README.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV2/README_CN.md b/python/examples/pipeline/PaddleClas/MobileNetV2/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark.py b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV2/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/MobileNetV2/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV2/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/MobileNetV2/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README.md b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README_CN.md b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark.py b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README.md b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README_CN.md b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark.py b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd/README.md b/python/examples/pipeline/PaddleClas/ResNet50_vd/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd/README_CN.md b/python/examples/pipeline/PaddleClas/ResNet50_vd/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark.py b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNet50_vd/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ResNet50_vd/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README_CN.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/resnet50_web_service.py
index b89c2cc74f4c57906ff871e1dde244d5b37098c4..43dac2a27c64d79f85f73011755c418cc6a59f1e 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README_CN.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/resnet50_web_service.py
index 7aade27ea198afe1cbac7b775cfe3a6cbcb3b1df..569b15bcfa61a1a1732de303e2980e9b4387c9a0 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README_CN.md b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/resnet50_web_service.py
index 2734521dda15fe1c491fc66c5536203888d00d23..debc1753cc9174dd79bf3a0072681b352c8be17b 100644
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleClas/ResNet_V2_50/README.md b/python/examples/pipeline/PaddleClas/ResNet_V2_50/README.md
index 5b909301d9e114019ae8c6ac2bbfcec3cb188b33..1297abfb7a649e3eced26ea4c08848e0a51fbdbf 100644
--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/README.md
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/README.md
@@ -4,17 +4,17 @@ This document will takes Imagenet service as an example to introduce how to use
 
 ## Get model
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ResNet_V2_50/README_CN.md b/python/examples/pipeline/PaddleClas/ResNet_V2_50/README_CN.md
index cc2fcdd7514fc197ec892826ec56b76906150578..d547b289281cb13a3abb49343b6b77230a2f3d2c 100644
--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/README_CN.md
@@ -4,18 +4,17 @@
 
 ## 获取模型
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ResNet_V2_50/benchmark.py b/python/examples/pipeline/PaddleClas/ResNet_V2_50/benchmark.py
index 98babc4acddb9a548afeafed1dfee16a88244714..562d159da3ce96233f7f9d2019fbb3061022dc06 100644
--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18000/imagenet/prediction"    
+    url = "http://127.0.0.1:18000/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..82a570244cecc51061a38b64c25602f8dfbe931d 100644
--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README.md b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README.md
index d0fa99e6d72f10d3d2b5907285528b68685128e0..6fbe0c4cf3a635670341d5aee4cee8bcbdc59a88 100644
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README.md
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README.md
@@ -10,10 +10,10 @@ sh get_model.sh
 ## Start server
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## RPC test
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
diff --git a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README_CN.md b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README_CN.md
index 335c96b2144b17e20d6007f376dec4416fb10aa5..c204c3c662825ed26001cf6d444d94f0bab508f7 100644
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README_CN.md
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/README_CN.md
@@ -10,11 +10,10 @@ sh get_model.sh
 ## 启动服务
 
 ```
-python resnet50_web_service.py &>log.txt &
+python3 resnet50_web_service.py &>log.txt &
 ```
 
 ## 测试
 ```
-python pipeline_rpc_client.py
+python3 pipeline_rpc_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark.py b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark.py
index 2433b0132728dc96627254f9231949a74a551c28..90a3ff9bdda545a01427a26146edcbdf8332da30 100644
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark.py
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import base64
@@ -5,13 +19,13 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -24,6 +38,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
         config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
     else:
         config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18080/imagenet/prediction"    
+    url = "http://127.0.0.1:18080/imagenet/prediction"
     start = time.time()
 
     with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     client = PipelineClient()
     client.connect(['127.0.0.1:18080'])
@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         if device == "gpu":
@@ -120,7 +140,7 @@ if __name__ == "__main__":
             gpu_id = None
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -131,4 +151,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/pipeline_rpc_client.py b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/pipeline_rpc_client.py
index 34a08f4b5d1ec2861c3101685b434453d61156de..a816eb8eed49d922d5caf729dfd089fc28936853 100644
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/pipeline_rpc_client.py
@@ -11,10 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
diff --git a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/resnet50_web_service.py b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/resnet50_web_service.py
index 3e43ce8608e5e0edac1802910856be2ed6e6b635..c246e45db331925e47b8d026f4801c5acf5f2ae7 100644
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/resnet50_web_service.py
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/resnet50_web_service.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 import sys
 from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
-try:
-    from paddle_serving_server_gpu.web_service import WebService, Op
-except ImportError:
-    from paddle_serving_server.web_service import WebService, Op
+
+from paddle_serving_server.web_service import WebService, Op
 import logging
 import numpy as np
 import base64, cv2
diff --git a/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md b/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md
index 4d242be2f3f7550c3bb64053a5689894a6b2c76c..a56ecbef06d82eef59510a1242de7f19c0915d55 100644
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md
@@ -8,11 +8,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf faster_rcnn_r50_fpn_1x_coco.tar
-python web_service.py
+python3 web_service.py
 ```
 
 ### Perform prediction
 
 ```
-python pipeline_http_client.py
+python3 pipeline_http_client.py
 ```
diff --git a/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py b/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py
index f0a55614c1390b1d4f73bd015b1ce21b85e4ba55..8a25952cdda2e09f0f74794cf8a2226880f29040 100644
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import yaml
@@ -6,17 +20,17 @@ import time
 import json
 import cv2
 import base64
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -29,6 +43,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -36,10 +51,11 @@ def gen_yml(device, gpu_id):
     config["dag"]["tracer"] = {"interval_s": 30}
     if device == "gpu":
         config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
-        config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id        
-    with open("config2.yml", "w") as fout: 
+        config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
     url = "http://127.0.0.1:18082/faster_rcnn/prediction"
@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
             break
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     pass
 
+
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         gpu_id = sys.argv[5]
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -110,4 +130,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md
index a37ca74056fb9dcd4a609f87f914e1ac71df070d..73087efca7abc75d9ed7d6178d962911b9a2b1cb 100644
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md
@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf ppyolo_mbv3_large_coco.tar
-python web_service.py
+python3 web_service.py
 ```
 
 ### Perform prediction
 ```
-python pipeline_http_client.py
+python3 pipeline_http_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py
index a23f64314ef448f2617f92ab40f94f75cc6e707f..45853c065b013754d0d591686a9a03ad0aeb6a3d 100644
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import yaml
@@ -6,17 +20,17 @@ import time
 import json
 import cv2
 import base64
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -29,6 +43,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -37,9 +52,10 @@ def gen_yml(device, gpu_id):
     if device == "gpu":
         config["op"]["ppyolo_mbv3"]["local_service_conf"]["device_type"] = 1
         config["op"]["ppyolo_mbv3"]["local_service_conf"]["devices"] = gpu_id
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
     url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
             break
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     pass
 
+
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         gpu_id = sys.argv[5]
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -110,4 +130,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/PaddleDetection/yolov3/README.md b/python/examples/pipeline/PaddleDetection/yolov3/README.md
index 1a1431a2a90d404c23728e5515c00aebce0fa4a7..8340f1060d0be6b100575ecbcb0270db0a6227f4 100644
--- a/python/examples/pipeline/PaddleDetection/yolov3/README.md
+++ b/python/examples/pipeline/PaddleDetection/yolov3/README.md
@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
 ### Start the service
 ```
 tar xf yolov3_darknet53_270e_coco.tar
-python web_service.py
+python3 web_service.py
 ```
 
 ### Perform prediction
 ```
-python pipeline_http_client.py
+python3 pipeline_http_client.py
 ```
-
diff --git a/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py b/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py
index ae9c5a8fb25f56eebe3c3893a4a4d251f21e5b61..62732613dbfc6ab0b119609a547ea36c18b11ede 100644
--- a/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import yaml
@@ -6,17 +20,17 @@ import time
 import json
 import cv2
 import base64
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
 
+
 def cv2_to_base64(image):
     return base64.b64encode(image).decode('utf8')
 
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -29,6 +43,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device, gpu_id):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -36,10 +51,11 @@ def gen_yml(device, gpu_id):
     config["dag"]["tracer"] = {"interval_s": 30}
     if device == "gpu":
         config["op"]["yolov3"]["local_service_conf"]["device_type"] = 1
-        config["op"]["yolov3"]["local_service_conf"]["devices"] = gpu_id        
-    with open("config2.yml", "w") as fout: 
+        config["op"]["yolov3"]["local_service_conf"]["devices"] = gpu_id
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
     url = "http://127.0.0.1:18082/yolov3/prediction"
@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
             break
     return [[end - start], latency_list, [total_num]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
     start = time.time()
@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
                                          total_cost))
     show_latency(result[1])
 
+
 def run_rpc(thread, batch_size):
     pass
 
+
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         gpu_id = sys.argv[5]
         gen_yml(device, gpu_id)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -110,4 +130,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/bert/benchmark.py b/python/examples/pipeline/bert/benchmark.py
index 3dece4914d6a606753c2b91db2a6d759e0ec6897..5abc646bffffff118ab24414e3a50f06668729d9 100644
--- a/python/examples/pipeline/bert/benchmark.py
+++ b/python/examples/pipeline/bert/benchmark.py
@@ -1,13 +1,25 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
@@ -38,6 +50,8 @@ from paddle_serving_client.utils import benchmark_args, show_latency
 2021-03-16 10:26:01,840 	chl0(In: ['@DAGExecutor'], Out: ['bert']) size[0/0]
 2021-03-16 10:26:01,841 	chl1(In: ['bert'], Out: ['@DAGExecutor']) size[0/0]
 '''
+
+
 def parse_benchmark(filein, fileout):
     with open(filein, "r") as fin:
         res = yaml.load(fin)
@@ -50,6 +64,7 @@ def parse_benchmark(filein, fileout):
     with open(fileout, "w") as fout:
         yaml.dump(res, fout, default_flow_style=False)
 
+
 def gen_yml(device):
     fin = open("config.yml", "r")
     config = yaml.load(fin)
@@ -57,13 +72,14 @@ def gen_yml(device):
     config["dag"]["tracer"] = {"interval_s": 10}
     if device == "gpu":
         config["op"]["bert"]["local_service_conf"]["device_type"] = 1
-        config["op"]["bert"]["local_service_conf"]["devices"] = "2"        
-    with open("config2.yml", "w") as fout: 
+        config["op"]["bert"]["local_service_conf"]["devices"] = "2"
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18082/bert/prediction"    
+    url = "http://127.0.0.1:18082/bert/prediction"
     start = time.time()
     with open("data-c.txt", 'r') as fin:
         start = time.time()
@@ -84,9 +100,11 @@ def run_http(idx, batch_size):
         end = time.time()
     return [[end - start]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_http , thread, batch_size)
+    result = multi_thread_runner.run(run_http, thread, batch_size)
+
 
 def run_rpc(thread, batch_size):
     client = PipelineClient()
@@ -110,16 +128,17 @@ def run_rpc(thread, batch_size):
 
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         device = sys.argv[4]
         gen_yml(device)
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
@@ -130,4 +149,3 @@ if __name__ == "__main__":
         filein = sys.argv[2]
         fileout = sys.argv[3]
         parse_benchmark(filein, fileout)
-    
diff --git a/python/examples/pipeline/ocr/benchmark.py b/python/examples/pipeline/ocr/benchmark.py
index 79ecead3801cc48714812a7a8732e8b7a2367989..1e39176436b0be11093031ddfc4727ee68671c62 100644
--- a/python/examples/pipeline/ocr/benchmark.py
+++ b/python/examples/pipeline/ocr/benchmark.py
@@ -19,10 +19,8 @@ import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
diff --git a/python/examples/pipeline/simple_web_service/README.md b/python/examples/pipeline/simple_web_service/README.md
index f52f7a85d1c9da98572def013e8d83c5aca2419c..ce2fc841b92b27e1b310353d2b8ef31ae48a2aeb 100644
--- a/python/examples/pipeline/simple_web_service/README.md
+++ b/python/examples/pipeline/simple_web_service/README.md
@@ -10,7 +10,7 @@ sh get_data.sh
 ## Start server
 
 ```
-python web_service.py &>log.txt &
+python3 web_service.py &>log.txt &
 ```
 
 ## Http test
diff --git a/python/examples/pipeline/simple_web_service/README_CN.md b/python/examples/pipeline/simple_web_service/README_CN.md
index 8b07942c19c566f5638e4497eb7c4d5a9fc1f2b2..b7007d366e058af40e0383fb05f8cfcbca6e19d2 100644
--- a/python/examples/pipeline/simple_web_service/README_CN.md
+++ b/python/examples/pipeline/simple_web_service/README_CN.md
@@ -10,7 +10,7 @@ sh get_data.sh
 ## 启动服务
 
 ```
-python web_service.py &>log.txt &
+python3 web_service.py &>log.txt &
 ```
 
 ## 测试
diff --git a/python/examples/pipeline/simple_web_service/benchmark.py b/python/examples/pipeline/simple_web_service/benchmark.py
index f5041fab1c3d7f91f0b4b61a9a63fad168753dc6..c2c612dd2740d7c97da4289a0913270b03611e7a 100644
--- a/python/examples/pipeline/simple_web_service/benchmark.py
+++ b/python/examples/pipeline/simple_web_service/benchmark.py
@@ -1,28 +1,42 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import os
 import yaml
 import requests
 import time
 import json
-try:
-    from paddle_serving_server_gpu.pipeline import PipelineClient
-except ImportError:
-    from paddle_serving_server.pipeline import PipelineClient
+
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args, show_latency
 
+
 def gen_yml():
     fin = open("config.yml", "r")
     config = yaml.load(fin)
     fin.close()
     config["dag"]["tracer"] = {"interval_s": 5}
-    with open("config2.yml", "w") as fout: 
+    with open("config2.yml", "w") as fout:
         yaml.dump(config, fout, default_flow_style=False)
 
+
 def run_http(idx, batch_size):
     print("start thread ({})".format(idx))
-    url = "http://127.0.0.1:18082/uci/prediction"    
+    url = "http://127.0.0.1:18082/uci/prediction"
     start = time.time()
     value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
     all_value = ";".join([value for i in range(batch_size)])
@@ -33,9 +47,11 @@ def run_http(idx, batch_size):
     end = time.time()
     return [[end - start]]
 
+
 def multithread_http(thread, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_http , thread, batch_size)
+    result = multi_thread_runner.run(run_http, thread, batch_size)
+
 
 def run_rpc(thread, batch_size):
     client = PipelineClient()
@@ -44,25 +60,26 @@ def run_rpc(thread, batch_size):
     all_value = ";".join([value for i in range(batch_size)])
     data = {"key": "x", "value": all_value}
     for i in range(1000):
-        ret = client.predict(feed_dict={data["key"]: data["value"]}, fetch=["res"])
+        ret = client.predict(
+            feed_dict={data["key"]: data["value"]}, fetch=["res"])
     print(ret)
 
+
 def multithread_rpc(thraed, batch_size):
     multi_thread_runner = MultiThreadRunner()
-    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+    result = multi_thread_runner.run(run_rpc, thread, batch_size)
+
 
 if __name__ == "__main__":
     if sys.argv[1] == "yaml":
-        mode = sys.argv[2] # brpc/  local predictor
+        mode = sys.argv[2]  # brpc/  local predictor
         thread = int(sys.argv[3])
         gen_yml()
     elif sys.argv[1] == "run":
-        mode = sys.argv[2] # http/ rpc
+        mode = sys.argv[2]  # http/ rpc
         thread = int(sys.argv[3])
         batch_size = int(sys.argv[4])
         if mode == "http":
             multithread_http(thread, batch_size)
         elif mode == "rpc":
             multithread_rpc(thread, batch_size)
-
-    
diff --git a/python/examples/resnet_v2_50/README.md b/python/examples/resnet_v2_50/README.md
index 0279918b664dfc5d5d922e8d7ba6bc6aaa15106a..12144b0ea9836c9eb647fa6482db244f1030354b 100644
--- a/python/examples/resnet_v2_50/README.md
+++ b/python/examples/resnet_v2_50/README.md
@@ -3,7 +3,7 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
 ```
 
 ### Client Prediction
 
 ```
-python resnet50_v2_tutorial.py
+python3 resnet50_v2_tutorial.py
 ```
diff --git a/python/examples/resnet_v2_50/README_CN.md b/python/examples/resnet_v2_50/README_CN.md
index c67e4f7c3e06c8fe0f3266ed51fc7d6db813ae7b..fee0e01f3cbac29052e4ae931027574ab6f778a0 100644
--- a/python/examples/resnet_v2_50/README_CN.md
+++ b/python/examples/resnet_v2_50/README_CN.md
@@ -3,7 +3,7 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
 ### 启动服务端
 
 ```
-python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
+python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
 ```
 
 ### 客户端预测
 
 ```
-python resnet50_v2_tutorial.py
+python3 resnet50_v2_tutorial.py
 ```
diff --git a/python/examples/senta/README.md b/python/examples/senta/README.md
index 8929a9312c17264800f299f77afb583221006068..9a159133eeb20832c1870bb949136a59ae461901 100644
--- a/python/examples/senta/README.md
+++ b/python/examples/senta/README.md
@@ -3,16 +3,16 @@
 
 ## Get Model
 ```
-python -m paddle_serving_app.package --get_model senta_bilstm
-python -m paddle_serving_app.package --get_model lac
+python3 -m paddle_serving_app.package --get_model senta_bilstm
+python3 -m paddle_serving_app.package --get_model lac
 tar -xzvf senta_bilstm.tar.gz
 tar -xzvf lac.tar.gz
 ```
 
 ## Start HTTP Service
 ```
-python -m paddle_serving_server.serve --model lac_model --port 9300
-python senta_web_service.py
+python3 -m paddle_serving_server.serve --model lac_model --port 9300
+python3 senta_web_service.py
 ```
 In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). 
 In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task.
diff --git a/python/examples/senta/README_CN.md b/python/examples/senta/README_CN.md
index e5624dc975e6bc00de219f68cbf74dea7cac8360..a09fd117767cbdd01847d6cdef06992caf4a9715 100644
--- a/python/examples/senta/README_CN.md
+++ b/python/examples/senta/README_CN.md
@@ -3,16 +3,16 @@
 
 ## 获取模型文件
 ```
-python -m paddle_serving_app.package --get_model senta_bilstm
-python -m paddle_serving_app.package --get_model lac
+python3 -m paddle_serving_app.package --get_model senta_bilstm
+python3 -m paddle_serving_app.package --get_model lac
 tar -xzvf lac.tar.gz
 tar -xzvf senta_bilstm.tar.gz
 ```
 
 ## 启动HTTP服务
 ```
-python -m paddle_serving_server.serve --model lac_model --port 9300
-python senta_web_service.py
+python3 -m paddle_serving_server.serve --model lac_model --port 9300
+python3 senta_web_service.py
 ```
 中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词。
 示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分。
diff --git a/python/examples/unet_for_image_seg/README.md b/python/examples/unet_for_image_seg/README.md
index 170dc133aea41a6f31696c2161d8e60ccfb4a621..59004712bd76f5388d6e57947f70ce22562f8dbe 100644
--- a/python/examples/unet_for_image_seg/README.md
+++ b/python/examples/unet_for_image_seg/README.md
@@ -3,7 +3,7 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model unet
+python3 -m paddle_serving_app.package --get_model unet
 tar -xzvf unet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
+python3 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
 ```
 
 ### Client Prediction
 
 ```
-python seg_client.py
+python3 seg_client.py
 ```
diff --git a/python/examples/unet_for_image_seg/README_CN.md b/python/examples/unet_for_image_seg/README_CN.md
index eed1313eb938be67b80331e498b01a9749cb5dc6..53c2f1893a879d5585cea0b77103fc1461086784 100644
--- a/python/examples/unet_for_image_seg/README_CN.md
+++ b/python/examples/unet_for_image_seg/README_CN.md
@@ -3,7 +3,7 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model unet
+python3 -m paddle_serving_app.package --get_model unet
 tar -xzvf unet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz
 ### 启动服务端
 
 ```
-python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
+python3 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
 ```
 
 ### 客户端预测
 
 ```
-python seg_client.py
+python3 seg_client.py
 ```
diff --git a/python/examples/util/README.md b/python/examples/util/README.md
index 64cb44a0a84d243810be409e2efd3870c8a4f75c..678ca388df5106e57f146a9758e3ef8da485e270 100644
--- a/python/examples/util/README.md
+++ b/python/examples/util/README.md
@@ -13,14 +13,14 @@ In order to show the time consuming of each stage more intuitively, a script is
 
 When using, first save the output of the client to a file, taking `profile` as an example.
 ```
-python show_profile.py profile ${thread_num}
+python3 show_profile.py profile ${thread_num}
 ```
 Here the `thread_num` parameter is the number of processes when the client is running, and the script will calculate the average time spent in each phase according to this parameter.
 
 The script calculates the time spent in each stage, divides by the number of threads to average, and prints to standard output.
 
 ```
-python timeline_trace.py profile trace
+python3 timeline_trace.py profile trace
 ```
 The script converts the time-dot information in the log into a json format and saves it to a trace file. The trace file can be visualized through the tracing function of the Chrome browser.
 
diff --git a/python/examples/util/README_CN.md b/python/examples/util/README_CN.md
index 43acef8073148b7a4978ed5c02fa5fa05258f6a0..aaca0ae21dd1af33a3fb708efd0b2113525e5141 100644
--- a/python/examples/util/README_CN.md
+++ b/python/examples/util/README_CN.md
@@ -13,14 +13,14 @@ export FLAGS_profile_server=1 #开启server端各阶段时间打点
 
 使用时先将client的输出保存到文件，以profile为例。
 ```
-python show_profile.py profile ${thread_num}
+python3 show_profile.py profile ${thread_num}
 ```
 这里thread_num参数为client运行时的进程数，脚本将按照这个参数来计算各阶段的平均耗时。
 
 脚本将计算各阶段的耗时，并除以线程数做平均，打印到标准输出。
 
 ```
-python timeline_trace.py profile trace
+python3 timeline_trace.py profile trace
 ```
 脚本将日志中的时间打点信息转换成json格式保存到trace文件，trace文件可以通过chrome浏览器的tracing功能进行可视化。
 
diff --git a/python/examples/xpu/fit_a_line_xpu/README.md b/python/examples/xpu/fit_a_line_xpu/README.md
index e54dc69f1042a6031e9f5a1570d67c5696817191..b74ddd38613ba30444fb97a34cbab1c154882574 100644
--- a/python/examples/xpu/fit_a_line_xpu/README.md
+++ b/python/examples/xpu/fit_a_line_xpu/README.md
@@ -13,15 +13,15 @@ sh get_data.sh
 ### Start server
 You can use the following code to start the RPC service 
 ```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
 ```
 
 ### Client prediction
 
-The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
+The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip3 install paddlepaddle`).
 
 ``` shell
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python3 test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 ## HTTP service
@@ -30,7 +30,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt
 
 Start a web service with default web service hosting modules:
 ``` shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
 ```
 
 ### Client prediction
diff --git a/python/examples/xpu/fit_a_line_xpu/README_CN.md b/python/examples/xpu/fit_a_line_xpu/README_CN.md
index e19a17afb643db84129d20979b5822931ee335d7..60adac1c17a0a232a37a0235999a687b48dcbc7a 100644
--- a/python/examples/xpu/fit_a_line_xpu/README_CN.md
+++ b/python/examples/xpu/fit_a_line_xpu/README_CN.md
@@ -15,21 +15,21 @@ sh get_data.sh
 ### 开启服务端
 
 ``` shell
-python test_server.py uci_housing_model/
+python3 test_server.py uci_housing_model/
 ```
 
 也可以通过下面的一行代码开启默认RPC服务：
 
 ```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
 ```
 
 ### 客户端预测
 
-`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
+`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip3 install paddlepaddle`）。
 
 ``` shell
-python test_client.py uci_housing_client/serving_client_conf.prototxt
+python3 test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
 
 ## HTTP服务
@@ -39,7 +39,7 @@ python test_client.py uci_housing_client/serving_client_conf.prototxt
 通过下面的一行代码开启默认web服务：
 
 ``` shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
+python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
 ```
 
 ### 客户端预测
diff --git a/python/examples/xpu/resnet_v2_50_xpu/README.md b/python/examples/xpu/resnet_v2_50_xpu/README.md
index ba19b6d7e442346fbc4ee890c34f6fa6c5b55bf7..76b04d614bd4513e806d9a139c38d66b8bce6569 100644
--- a/python/examples/xpu/resnet_v2_50_xpu/README.md
+++ b/python/examples/xpu/resnet_v2_50_xpu/README.md
@@ -3,7 +3,7 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
 ### Start Service
 
 ```
-python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
+python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
 ```
 
 ### Client Prediction
 
 ```
-python resnet50_client.py
+python3 resnet50_client.py
 ```
diff --git a/python/examples/xpu/resnet_v2_50_xpu/README_CN.md b/python/examples/xpu/resnet_v2_50_xpu/README_CN.md
index 007c90e4a498dc576982fc26a2814918ec1a7b91..652c4f672fd82b494a2240f327463e50dca8829c 100644
--- a/python/examples/xpu/resnet_v2_50_xpu/README_CN.md
+++ b/python/examples/xpu/resnet_v2_50_xpu/README_CN.md
@@ -3,7 +3,7 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
+python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
 tar -xzvf resnet_v2_50_imagenet.tar.gz
 ```
 
@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
 ### 启动服务端
 
 ```
-python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
+python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
 ```
 
 ### 客户端预测
 
 ```
-python resnet50_client.py
+python3 resnet50_client.py
 ```
diff --git a/python/examples/xpu/vgg19/README.md b/python/examples/xpu/vgg19/README.md
index 338a80562df3a74033c839cf42ab66e87982595c..d8520684f55a9caf88818905f4cc309f55304fe0 100644
--- a/python/examples/xpu/vgg19/README.md
+++ b/python/examples/xpu/vgg19/README.md
@@ -26,5 +26,5 @@ python3 -m paddle_serving_server.serve --model serving_server --port 7702 --use_
 ### Client Prediction
 
 ```
-python vgg19_client.py
+python3 vgg19_client.py
 ```
diff --git a/python/examples/yolov4/README.md b/python/examples/yolov4/README.md
index fb1bc7622da88cc827b64cfc37336a4de3331831..0c7cfa7c0ffb4938456aa908015aff2daf367727 100644
--- a/python/examples/yolov4/README.md
+++ b/python/examples/yolov4/README.md
@@ -5,19 +5,19 @@
 ## Get Model
 
 ```
-python -m paddle_serving_app.package --get_model yolov4
+python3 -m paddle_serving_app.package --get_model yolov4
 tar -xzvf yolov4.tar.gz
 ```
 
 ## Start RPC Service
 
 ```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
 ```
 
 ## Prediction
 
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
 After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
diff --git a/python/examples/yolov4/README_CN.md b/python/examples/yolov4/README_CN.md
index 72923c5af51d2584ae151cbc15ba62efb48adced..1c773033418b9d072a7096a91d47b665b465c322 100644
--- a/python/examples/yolov4/README_CN.md
+++ b/python/examples/yolov4/README_CN.md
@@ -5,20 +5,20 @@
 ## 获取模型
 
 ```
-python -m paddle_serving_app.package --get_model yolov4
+python3 -m paddle_serving_app.package --get_model yolov4
 tar -xzvf yolov4.tar.gz
 ```
 
 ## 启动RPC服务
 
 ```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
+python3 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
 ```
 
 ## 预测
 
 ```
-python test_client.py 000000570688.jpg
+python3 test_client.py 000000570688.jpg
 ```
 
 预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
diff --git a/python/pipeline/util.py b/python/pipeline/util.py
index d7847f179de7557b5446958536008adc3c981f95..8bc15446b81c24162bbe2e236f204ffd1d0c23d1 100755
--- a/python/pipeline/util.py
+++ b/python/pipeline/util.py
@@ -39,7 +39,7 @@ class AvailablePortGenerator(object):
     def port_is_available(port):
         with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
             sock.settimeout(2)
-            result = sock.connect_ex(('0.0.0.0', port))
+            result = sock.connect_ex(('127.0.0.1', port))
         if result != 0:
             return True
         else: