diff --git a/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml b/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml
index 0bcb6c288914acc852c82974eb7eacf560784255..891b4b997c2ebb98d6694464b5dbe0532c01145c 100644
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml
@@ -1,18 +1,29 @@
 dag:
+  #op资源类型, True, 为线程模型；False，为进程模型
   is_thread_op: false
+  #使用性能分析, True，生成Timeline性能数据，对性能有一定影响；False为不使用
   tracer:
     interval_s: 30
+#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时，不自动生成http_port
 http_port: 18082
 op:
   faster_rcnn:
+    #并发数，is_thread_op=True时，为线程并发；否则为进程并发
     concurrency: 2
-
     local_service_conf:
+      #client类型，包括brpc, grpc和local_predictor.local_predictor不启动Serving服务，进程内预测
       client_type: local_predictor
+      # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
       device_type: 1
+      #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
       devices: '2'
+      #Fetch结果列表，以bert_seq128_model中fetch_var的alias_name为准, 如果没有设置则全部返回
       fetch_list:
       - save_infer_model/scale_0.tmp_1
+      #模型路径
       model_config: serving_server/
+#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时，会自动将rpc_port设置为http_port+1
 rpc_port: 9998
+#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程，每个进程内构建grpcSever和DAG
+#当build_dag_each_worker=False时，框架会设置主线程grpc线程池的max_workers=worker_num
 worker_num: 20
diff --git a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml
index 0476d4ce6554af31e21d0c2ea0473e23de18523f..71e93f39c7979522e73058af7fa2969575b5129c 100644
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml
@@ -1,18 +1,30 @@
 dag:
+  #op资源类型, True, 为线程模型；False，为进程模型
   is_thread_op: false
+  #使用性能分析, True，生成Timeline性能数据，对性能有一定影响；False为不使用
   tracer:
     interval_s: 30
+#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时，不自动生成http_port
 http_port: 18082
 op:
   ppyolo_mbv3:
+    #并发数，is_thread_op=True时，为线程并发；否则为进程并发
     concurrency: 10
 
     local_service_conf:
+      #client类型，包括brpc, grpc和local_predictor.local_predictor不启动Serving服务，进程内预测
       client_type: local_predictor
+      # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
       device_type: 1
+      #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
       devices: '2'
+      #Fetch结果列表，以bert_seq128_model中fetch_var的alias_name为准, 如果没有设置则全部返回
       fetch_list:
       - save_infer_model/scale_0.tmp_1
+      #模型路径
       model_config: serving_server/
+#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时，会自动将rpc_port设置为http_port+1
 rpc_port: 9998
+#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程，每个进程内构建grpcSever和DAG
+#当build_dag_each_worker=False时，框架会设置主线程grpc线程池的max_workers=worker_num
 worker_num: 20
diff --git a/python/examples/pipeline/PaddleDetection/yolov3/config.yml b/python/examples/pipeline/PaddleDetection/yolov3/config.yml
index 20653280736316c87d50786e76db5ba842040525..0f6d839edd3467e4dca203b9a21db850db3f4d5e 100644
--- a/python/examples/pipeline/PaddleDetection/yolov3/config.yml
+++ b/python/examples/pipeline/PaddleDetection/yolov3/config.yml
@@ -1,18 +1,29 @@
 dag:
+  #op资源类型, True, 为线程模型；False，为进程模型
   is_thread_op: false
+  #使用性能分析, True，生成Timeline性能数据，对性能有一定影响；False为不使用
   tracer:
     interval_s: 30
+#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时，不自动生成http_port
 http_port: 18082
 op:
   yolov3:
+    #并发数，is_thread_op=True时，为线程并发；否则为进程并发
     concurrency: 10
-
     local_service_conf:
+      #client类型，包括brpc, grpc和local_predictor.local_predictor不启动Serving服务，进程内预测
       client_type: local_predictor
+      # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
       device_type: 1
+      #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
       devices: '2'
+      #Fetch结果列表，以bert_seq128_model中fetch_var的alias_name为准, 如果没有设置则全部返回
       fetch_list:
       - save_infer_model/scale_0.tmp_1
+      #模型路径
       model_config: serving_server/
+#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时，会自动将rpc_port设置为http_port+1
 rpc_port: 9998
+#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程，每个进程内构建grpcSever和DAG
+#当build_dag_each_worker=False时，框架会设置主线程grpc线程池的max_workers=worker_num
 worker_num: 20
diff --git a/python/examples/pipeline/bert/config.yml b/python/examples/pipeline/bert/config.yml
index a2b39264dd78ccb8f2936c7bd603d1c3d57b2574..5f1226646bb1a14fee3460bc98e25321b6aaa27a 100644
--- a/python/examples/pipeline/bert/config.yml
+++ b/python/examples/pipeline/bert/config.yml
@@ -1,17 +1,32 @@
+#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程，每个进程内构建grpcSever和DAG
+##当build_dag_each_worker=False时，框架会设置主线程grpc线程池的max_workers=worker_num
 worker_num: 20
+#build_dag_each_worker, False，框架在进程内创建一条DAG；True，框架会每个进程内创建多个独立的DAG
+build_dag_each_worker: false
+
 dag:
+  #op资源类型, True, 为线程模型；False，为进程模型
   is_thread_op: false
+  #使用性能分析, True，生成Timeline性能数据，对性能有一定影响；False为不使用
   tracer:
     interval_s: 10
+#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时，不自动生成http_port
 http_port: 18082
+#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时，会自动将rpc_port设置为http_port+1
 rpc_port: 9998
 op:
   bert:
+    #并发数，is_thread_op=True时，为线程并发；否则为进程并发
     concurrency: 2
-
+    #当op配置没有server_endpoints时，从local_service_conf读取本地服务配置
     local_service_conf:
+      #client类型，包括brpc, grpc和local_predictor.local_predictor不启动Serving服务，进程内预测
       client_type: local_predictor
+      # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
       device_type: 1
+      #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
       devices: '2'
+      #Fetch结果列表，以bert_seq128_model中fetch_var的alias_name为准, 如果没有设置则全部返回
       fetch_list:
+      #bert模型路径
       model_config: bert_seq128_model/
diff --git a/python/examples/pipeline/ocr/config.yml b/python/examples/pipeline/ocr/config.yml
index 58e3ed54d5d286290ff4846364c2393af427bd9d..2767fa77ceaa975c4e20bedaaf13ffa0e2b35de3 100644
--- a/python/examples/pipeline/ocr/config.yml
+++ b/python/examples/pipeline/ocr/config.yml
@@ -38,6 +38,9 @@ op:
 
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
             fetch_list: ["concat_1.tmp_0"]
+            
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            device_type: 0
 
             #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
             devices: ""
@@ -71,6 +74,8 @@ op:
 
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
             fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] 
+            # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
+            device_type: 0
 
             #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
             devices: ""
diff --git a/tools/Dockerfile.cuda10.1-cudnn7.devel b/tools/Dockerfile.cuda10.1-cudnn7.devel
index 24087af9490b8b5f4b7f57d70cb927c580da6066..1ed462ec4c1df845bc461577d97c3fee7d5852d6 100644
--- a/tools/Dockerfile.cuda10.1-cudnn7.devel
+++ b/tools/Dockerfile.cuda10.1-cudnn7.devel
@@ -83,7 +83,7 @@ RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/
 RUN rm -r /root/python_build
 
 # Install Go and glide
-RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
+RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
     tar -xz -C /usr/local && \
     mkdir /root/go && \
     mkdir /root/go/bin && \
diff --git a/tools/Dockerfile.cuda10.2-cudnn7.devel b/tools/Dockerfile.cuda10.2-cudnn7.devel
index 6425a7a39ec1ca84a3f4d5ab305bcb6b413862bc..eee59b6e43ac18fc645dfb9c8399b33dff9f0e6d 100644
--- a/tools/Dockerfile.cuda10.2-cudnn7.devel
+++ b/tools/Dockerfile.cuda10.2-cudnn7.devel
@@ -83,7 +83,7 @@ RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/
 RUN rm -r /root/python_build
 
 # Install Go and glide
-RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
+RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
     tar -xz -C /usr/local && \
     mkdir /root/go && \
     mkdir /root/go/bin && \
diff --git a/tools/Dockerfile.cuda10.2-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn8.devel
index d07731343bb9bfd28f59dd4dcf240bcb26d302f5..5ba14c77c3ed3f479db5e05e9c9fbc8e6468dab6 100644
--- a/tools/Dockerfile.cuda10.2-cudnn8.devel
+++ b/tools/Dockerfile.cuda10.2-cudnn8.devel
@@ -83,7 +83,7 @@ RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/
 RUN rm -r /root/python_build
 
 # Install Go and glide
-RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
+RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
     tar -xz -C /usr/local && \
     mkdir /root/go && \
     mkdir /root/go/bin && \
diff --git a/tools/Dockerfile.cuda11.2-cudnn8.devel b/tools/Dockerfile.cuda11.2-cudnn8.devel
new file mode 100644
index 0000000000000000000000000000000000000000..363096b1ddd48268275992941c740c9d8d34e868
--- /dev/null
+++ b/tools/Dockerfile.cuda11.2-cudnn8.devel
@@ -0,0 +1,147 @@
+# A image for building paddle binaries
+# Use cuda devel base image for both cpu and gpu environment
+# When you modify it, please be aware of cudnn-runtime version
+FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu16.04
+MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
+
+# ENV variables
+ARG WITH_GPU
+ARG WITH_AVX
+
+ENV WITH_GPU=${WITH_GPU:-ON}
+ENV WITH_AVX=${WITH_AVX:-ON}
+
+ENV HOME /root
+# Add bash enhancements
+COPY tools/dockerfiles/root/ /root/
+
+# Prepare packages for Python
+RUN apt-get update && \
+    apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev \
+    libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
+    xz-utils tk-dev libffi-dev liblzma-dev
+
+RUN apt-get update && \
+    apt-get install -y --allow-downgrades --allow-change-held-packages \
+    patchelf git python-pip python-dev python-opencv openssh-server bison \
+    wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
+    curl sed grep graphviz libjpeg-dev zlib1g-dev  \
+    python-matplotlib unzip \
+    automake locales clang-format swig  \
+    liblapack-dev liblapacke-dev libcurl4-openssl-dev \
+    net-tools libtool module-init-tools vim && \
+    apt-get clean -y
+
+RUN ln -s /usr/lib/x86_64-linux-gnu/libssl.so /usr/lib/libssl.so.10 && \
+    ln -s /usr/lib/x86_64-linux-gnu/libcrypto.so /usr/lib/libcrypto.so.10
+
+RUN wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz -O shellcheck-v0.7.1.linux.x86_64.tar.xz && \
+    tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz && cp  shellcheck-v0.7.1/shellcheck /usr/bin/shellcheck && \
+    rm -rf shellcheck-v0.7.1.linux.x86_64.tar.xz shellcheck-v0.7.1
+
+# Downgrade gcc&&g++
+WORKDIR /usr/bin 
+      COPY tools/dockerfiles/build_scripts /build_scripts 
+      RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts 
+      RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ 
+      RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc 
+      RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ 
+      RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc 
+      RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ 
+      ENV PATH=/usr/local/gcc-8.2/bin:$PATH 
+
+# install cmake
+WORKDIR /home
+RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz
+ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH
+
+# Install Python3.6
+RUN mkdir -p /root/python_build/ && wget -q https://www.sqlite.org/2018/sqlite-autoconf-3250300.tar.gz && \
+    tar -zxf sqlite-autoconf-3250300.tar.gz && cd sqlite-autoconf-3250300 && \
+    ./configure -prefix=/usr/local && make -j8 && make install && cd ../ && rm sqlite-autoconf-3250300.tar.gz
+
+RUN wget -q https://www.python.org/ftp/python/3.6.0/Python-3.6.0.tgz && \
+    tar -xzf Python-3.6.0.tgz && cd Python-3.6.0 && \
+    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.6.0*
+
+# Install Python3.7
+RUN wget -q https://www.python.org/ftp/python/3.7.0/Python-3.7.0.tgz && \
+    tar -xzf Python-3.7.0.tgz && cd Python-3.7.0 && \
+    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.7.0*
+
+# Install Python3.8
+RUN wget -q https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tgz && \
+    tar -xzf Python-3.8.0.tgz && cd Python-3.8.0 && \
+    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.8.0*
+
+ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
+RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/bin/python3.6 /usr/bin/python3 && ln -sf /usr/local/bin/pip3.6 /usr/local/bin/pip3 && ln -sf /usr/local/bin/pip3.6 /usr/bin/pip3
+
+RUN rm -r /root/python_build
+
+# Install Go and glide
+RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
+    tar -xz -C /usr/local && \
+    mkdir /root/go && \
+    mkdir /root/go/bin && \
+    mkdir /root/go/src && \
+    echo "GOROOT=/usr/local/go" >> /root/.bashrc && \
+    echo "GOPATH=/root/go" >> /root/.bashrc && \
+    echo "PATH=/usr/local/go/bin:/root/go/bin:$PATH" >> /root/.bashrc
+ENV GOROOT=/usr/local/go GOPATH=/root/go
+# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
+ENV PATH=usr/local/go/bin:/root/go/bin:${PATH}
+
+# Install TensorRT
+# following TensorRT.tar.gz is not the default official one, we do two miny changes:
+# 1. Remove the unnecessary files to make the library small. TensorRT.tar.gz only contains include and lib now,
+#    and its size is only one-third of the official one.
+# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
+#    See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
+
+# Downgrade TensorRT 
+COPY tools/dockerfiles/build_scripts /build_scripts
+RUN bash /build_scripts/install_trt.sh cuda11.2 
+RUN rm -rf /build_scripts
+
+# git credential to skip password typing
+RUN git config --global credential.helper store
+
+# Fix locales to en_US.UTF-8
+RUN localedef -i en_US -f UTF-8 en_US.UTF-8
+
+RUN apt-get install libprotobuf-dev -y
+
+# Older versions of patchelf limited the size of the files being processed and were fixed in this pr.
+# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa
+# So install a newer version here.
+RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
+    dpkg -i patchelf_0.10-2_amd64.deb
+
+# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
+RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
+CMD source ~/.bashrc
+
+# ccache 3.7.9
+RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \
+    tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \
+    ./configure -prefix=/usr/local/ccache-3.7.9 && \
+    make -j8 && make install && \
+    ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache
+
+RUN python3.8 -m pip install --upgrade pip==21.1.1 requests && \
+    python3.7 -m pip install --upgrade pip==21.1.1 requests && \
+    python3.6 -m pip install --upgrade pip==21.1.1 requests 
+
+RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \
+    tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \
+    mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && \
+    ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && \
+    ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && \
+    ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && \
+    ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so
+
+EXPOSE 22
diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel
index be31b2e9abd90f644eb0f94a6d672639e4b7f6c5..287759e8f82f3fc37200bb791a1bd6530ab6516e 100644
--- a/tools/Dockerfile.devel
+++ b/tools/Dockerfile.devel
@@ -83,7 +83,7 @@ RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/
 RUN rm -r /root/python_build
 
 # Install Go and glide
-RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
+RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \
     tar -xz -C /usr/local && \
     mkdir /root/go && \
     mkdir /root/go/bin && \