diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake
index 0ab248f8c8a0bca9fa6f97f4520a5a9781c9b239..5eca6c5ba88ca50909e78a3d3b67c60c228c8207 100644
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -25,7 +25,7 @@ set(BOOST_PROJECT       "extern_boost")
 
 set(BOOST_VER           "1.74.0")
 set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE)
-set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
+set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
 
 MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
 
diff --git a/cmake/external/cudnn.cmake b/cmake/external/cudnn.cmake
index 98466d44fc0dd91ef0cc8e8eac2660c42a19267c..f560d8723523264881d692a88a2d276035a7ac77 100644
--- a/cmake/external/cudnn.cmake
+++ b/cmake/external/cudnn.cmake
@@ -61,8 +61,11 @@ else()
 endif()
 
 if(CUDNN_FOUND)
-    file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
-
+    if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
+        file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
+    elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
+        file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
+    endif()
     get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
 
     string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index 240c85762e41dc15fa9c9e559c0857352d1ba39c..c923486948f3d08af9a672b0ebad1c26708a6eff 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -27,12 +27,12 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/
 
 message( "WITH_GPU = ${WITH_GPU}")
 
-
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
 SET(PADDLE_VERSION "2.2.0-rc0")
 if (WITH_GPU)
+    message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}")
     # cuda 11.0 is not supported, 11.2 would be added.
     if(CUDA_VERSION EQUAL 10.1)
         set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5")
@@ -52,14 +52,19 @@ if (WITH_GPU)
 else()
     set(WITH_TRT OFF)
 endif()  
-
 if (WITH_GPU)
     SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}")
 elseif (WITH_LITE)
+    message("cpu arch: ${CMAKE_SYSTEM_PROCESSOR}")
     if (WITH_XPU)
-        SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
+        if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86-64")
+            SET(PADDLE_LIB_VERSION "x86-64_gcc8.2_avx_mkl")
+        elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
+        endif()
     else()
-        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
+        MESSAGE("paddle lite lib is unknown.")
+        SET(PADDLE_LIB_VERSION "paddle-lite-unknown")
     endif()
 else()
     if (WITH_AVX)
diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp
index 4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5..cc55dd30a5649afac98810fb83f98a837932a523 100644
--- a/core/general-client/src/client.cpp
+++ b/core/general-client/src/client.cpp
@@ -23,8 +23,7 @@ using configure::GeneralModelConfig;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
@@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res,
             output.tensor(idx).int_data().begin(),
             output.tensor(idx).int_data().begin() + size);
       } else if (fetch_name_to_type[name] == P_UINT8
-                || fetch_name_to_type[name] == P_INT8) {
+                || fetch_name_to_type[name] == P_INT8
+                || fetch_name_to_type[name] == P_FP16) {
         VLOG(2) << "fetch var [" << name << "]type="
                 << fetch_name_to_type[name];
         string_data_map[name] = output.tensor(idx).tensor_content();
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index fb71c0c9fc6e3680b8b51bad9ca891e41ef3a849..403119594c759a35d5dfd6251174627f367d9c65 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 482097d3e1fa1c7f7369573b1b1a0a5fde57ae58..2ad3e4cab6b77b305494c3833f0e3781ed0fd0b7 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
@@ -130,11 +129,11 @@ int GeneralReaderOp::inference() {
       data_len = tensor.tensor_content().size();
       src_ptr = tensor.tensor_content().data();
     } else if (elem_type == P_FP16) {
-      // paddle inference will support FLOAT16
-      // elem_size = 1;
-      // paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
-      // data_len = tensor.tensor_content().size();
-      // src_ptr = tensor.tensor_content().data();
+      // copy bytes from tensor content to TensorVector
+      elem_size = 1;
+      paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
+      data_len = tensor.tensor_content().size();
+      src_ptr = tensor.tensor_content().data();
     } else if (elem_type == P_STRING) {
       // use paddle::PaddleDType::UINT8 as for String.
       elem_size = sizeof(char);
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index e944c8d82d8aa2ad540455200cf835ce26eb366e..07d3473ec6ce12373114bfc50a67890ac2757634 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -178,14 +178,12 @@ int GeneralResponseOp::inference() {
         VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
                 << model_config->_fetch_name[idx] << "].";
         tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
-      } 
-      // inference will support fp16
-      //   else if (dtype == paddle::PaddleDType::FLOAT16) {
-      //   tensor->set_elem_type(5);
-      //   VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
-      //           << model_config->_fetch_name[idx] << "].";
-      //   tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
-      // }
+      } else if (dtype == paddle::PaddleDType::FLOAT16) {
+        tensor->set_elem_type(5);
+        VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
+                << model_config->_fetch_name[idx] << "].";
+        tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
+      }
 
       VLOG(2) << "(logid=" << log_id << ") fetch var ["
               << model_config->_fetch_name[idx] << "] ready";
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index a824acaff2417dcb5e885c0ae9e1acd6c17e7def..a6815d4939edfb2a0d6dcebaa602b545b770d52f 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -31,6 +31,7 @@
 #include "core/predictor/framework/infer_data.h"
 #include "core/predictor/framework/memory.h"
 #include "paddle_inference_api.h"  // NOLINT
+#include "experimental/float16.h"
 namespace baidu {
 namespace paddle_serving {
 namespace predictor {
@@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
                  paddle::PaddleDType::INT8) {
         int8_t* data = static_cast<int8_t*>(origin_data);
         lod_tensor_in->CopyFromCpu(data);
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+               paddle::PaddleDType::FLOAT16) {
+        paddle::platform::float16* data =
+            static_cast<paddle::platform::float16*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
       } else {
         LOG(ERROR) << "Inference not support type["
                    << (*tensorVector_in_pointer)[i].dtype << "],name["
                    << (*tensorVector_in_pointer)[i].name << "]"
                    << " copy into core failed!";
       }
-      // Paddle inference will support FP16 in next version.
-      // else if ((*tensorVector_in_pointer)[i].dtype ==
-      //          paddle::PaddleDType::FLOAT16) {
-      //   paddle::platform::float16* data =
-      //       static_cast<paddle::platform::float16*>(origin_data);
-      //   lod_tensor_in->CopyFromCpu(data);
-      // }
       VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
               << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
               << ";tensor_dtype=" << lod_tensor_in->type();
@@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
         int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
         lod_tensor_out->CopyToCpu(data_out);
         databuf_char = reinterpret_cast<char*>(data_out);
+      } else if (dataType == paddle::PaddleDType::FLOAT16) {
+        databuf_size = out_num * sizeof(paddle::platform::float16);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+          return -1;
+        }
+        paddle::platform::float16* data_out =
+            reinterpret_cast<paddle::platform::float16*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
       }
-      // Inference will support FP16 in next version
-      //  else if (dataType == paddle::PaddleDType::FLOAT16) {
-      //   using float16 = paddle::platform::float16;
-      //   databuf_size = out_num * sizeof(float16);
-      //   databuf_data = MempoolWrapper::instance().malloc(databuf_size);
-      //   if (!databuf_data) {
-      //     LOG(ERROR) << "Malloc failed, size: " << databuf_size;
-      //     return -1;
-      //   }
-      //   float16* data_out = reinterpret_cast<float16*>(databuf_data);
-      //   lod_tensor_out->CopyToCpu(data_out);
-      //   databuf_char = reinterpret_cast<char*>(data_out);
-      // }
 
       // Because task scheduling requires OPs to use 'Channel'
       // (which is a data structure) to transfer data between OPs.
diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h
index 7cc8120f4eb818905c303b22a0b00d6b205bddb4..c76147b6842b9f01b3b4f65785102766d3940aef 100644
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -266,6 +266,7 @@ class PaddleInferenceEngine : public EngineCore {
     if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
       // 2 MB l3 cache
       config.EnableXpu(2 * 1024 * 1024);
+      config.SetXpuDeviceId(gpu_id);
     }
 
     if (engine_conf.has_enable_memory_optimization() &&
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 589420ad45ae7f347c8e7b9b25c5cc0034830263..86740ce6033f9dbab490e9b9df21ffd17ebe0cd0 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -72,9 +72,13 @@ if (SERVER)
     if(CUDA_VERSION EQUAL 10.1)
       set(VERSION_SUFFIX 101)
     elseif(CUDA_VERSION EQUAL 10.2)
-      set(VERSION_SUFFIX 102)
-    elseif(CUDA_VERSION EQUAL 11.0)
-      set(VERSION_SUFFIX 11)
+      if(CUDNN_MAJOR_VERSION EQUAL 7)
+        set(VERSION_SUFFIX 1027)
+      elseif(CUDNN_MAJOR_VERSION EQUAL 8)
+        set(VERSION_SUFFIX 1028)
+      endif()
+    elseif(CUDA_VERSION EQUAL 11.2)
+      set(VERSION_SUFFIX 112)
     endif()
   endif()
 
diff --git a/python/examples/cascade_rcnn/label_list.txt b/python/examples/cascade_rcnn/label_list.txt
index d7d43a94adf73208f997f0efd6581bef11ca734e..941cb4e1392266f6a6c09b1fdc5f79503b2e5df6 100644
--- a/python/examples/cascade_rcnn/label_list.txt
+++ b/python/examples/cascade_rcnn/label_list.txt
@@ -1,4 +1,3 @@
-background
 person
 bicycle
 car
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
index afe4ba62d69850482e82ba97d43ac747e0f69aaf..7de419530462b59f733f6ecc81e8b2fd9ce61b80 100644
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
@@ -219,6 +219,7 @@ class LocalPredictor(object):
         if use_xpu:
             # 2MB l3 cache
             config.enable_xpu(8 * 1024 * 1024)
+            config.set_xpu_device_id(gpu_id)
         # set cpu low precision
         if not use_gpu and not use_lite:
             if precision_type == paddle_infer.PrecisionType.Int8:
diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py
index 9a8bb3db0180b8ce4617aeee6c7462da490884d8..648678f3afd9ffdc0af4c505779fc5eca0c42a37 100755
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -551,6 +551,22 @@ class Client(object):
                         tmp_lod = result_batch_handle.get_lod(mi, name)
                         if np.size(tmp_lod) > 0:
                             result_map["{}.lod".format(name)] = tmp_lod
+                elif self.fetch_names_to_type_[name] == float16_type:
+                    # result_map[name] will be py::array(numpy array)
+                    tmp_str = result_batch_handle.get_string_by_name(
+                        mi, name)
+                    result_map[name] = np.fromstring(tmp_str, dtype = np.float16)
+                    if result_map[name].size == 0:
+                        raise ValueError(
+                            "Failed to fetch, maybe the type of [{}]"
+                            " is wrong, please check the model file".format(
+                                name))
+                    shape = result_batch_handle.get_shape(mi, name)
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
             multi_result_map.append(result_map)
         ret = None
         if len(model_engine_names) == 1:
diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py
index f21e13aaf40dd7720cab87da046c6754845a9bd4..0510579d7c4225d9bef81d880e01642ea93efd90 100755
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -428,7 +428,7 @@ class Server(object):
         if device_type == "0":
             device_version = self.get_device_version()
         elif device_type == "1":
-            if version_suffix == "101" or version_suffix == "102":
+            if version_suffix == "101" or version_suffix == "1027" or version_suffix == "1028" or version_suffix == "112":
                 device_version = "gpu-" + version_suffix
             else:
                 device_version = "gpu-cuda" + version_suffix
diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py
index d04b96547e9fb2f7fa35d0983b6cb046f505e698..d9df5e3091053a62c98fd108a5985a1e518a7767 100644
--- a/python/pipeline/local_service_handler.py
+++ b/python/pipeline/local_service_handler.py
@@ -280,6 +280,10 @@ class LocalServiceHandler(object):
                 server.set_gpuid(gpuid)
             # TODO: support arm or arm + xpu later
             server.set_device(self._device_name)
+            if self._use_xpu:
+                server.set_xpu()
+            if self._use_lite:
+                server.set_lite()
 
         server.set_op_sequence(op_seq_maker.get_op_sequence())
         server.set_num_threads(thread_num)
diff --git a/tools/Dockerfile.cuda11-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn7.devel
similarity index 98%
rename from tools/Dockerfile.cuda11-cudnn8.devel
rename to tools/Dockerfile.cuda10.2-cudnn7.devel
index 2b4e75c7b112e7616ec91d03e12774386c974448..6425a7a39ec1ca84a3f4d5ab305bcb6b413862bc 100644
--- a/tools/Dockerfile.cuda11-cudnn8.devel
+++ b/tools/Dockerfile.cuda10.2-cudnn7.devel
@@ -1,7 +1,7 @@
 # A image for building paddle binaries
 # Use cuda devel base image for both cpu and gpu environment
 # When you modify it, please be aware of cudnn-runtime version
-FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu16.04
+FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
 
 # ENV variables
@@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH}
 
 # Downgrade TensorRT 
 COPY tools/dockerfiles/build_scripts /build_scripts
-RUN bash /build_scripts/install_trt.sh cuda11 
+RUN bash /build_scripts/install_trt.sh cuda10.2 cudnn7 
 RUN rm -rf /build_scripts
 
 # git credential to skip password typing
diff --git a/tools/Dockerfile.cuda10.2-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn8.devel
index 0b1f4f1cb519792ba9f917a1e7313906e9eea3b8..d07731343bb9bfd28f59dd4dcf240bcb26d302f5 100644
--- a/tools/Dockerfile.cuda10.2-cudnn8.devel
+++ b/tools/Dockerfile.cuda10.2-cudnn8.devel
@@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH}
 
 # Downgrade TensorRT 
 COPY tools/dockerfiles/build_scripts /build_scripts
-RUN bash /build_scripts/install_trt.sh cuda10.2 
+RUN bash /build_scripts/install_trt.sh cuda10.2 cudnn8 
 RUN rm -rf /build_scripts
 
 # git credential to skip password typing
diff --git a/tools/dockerfiles/build_scripts/install_trt.sh b/tools/dockerfiles/build_scripts/install_trt.sh
index 559a5552464a4ffb8891446548aa16537b06e059..b882bd216cb0746bb907ad5314befb2a939ccb68 100644
--- a/tools/dockerfiles/build_scripts/install_trt.sh
+++ b/tools/dockerfiles/build_scripts/install_trt.sh
@@ -15,20 +15,28 @@
 # limitations under the License.
 
 VERSION=$1
+CUDNN=$2
 if [[ "$VERSION" == "cuda10.1" ]];then
   wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.1-cudnn7.tar.gz --no-check-certificate
   tar -zxf TensorRT6-cuda10.1-cudnn7.tar.gz -C /usr/local
   cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/lib/* /usr/lib/
   echo "cuda10.1 trt install ==============>>>>>>>>>>>>"
   rm TensorRT6-cuda10.1-cudnn7.tar.gz
-elif [[ "$VERSION" == "cuda11" ]];then
-  wget -q https://paddle-ci.cdn.bcebos.com/TRT/TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz --no-check-certificate
-  tar -zxf TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -C /usr/local
-  cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/
-  rm TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz
+elif [[ "$VERSION" == "cuda11.2" ]];then
+  wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz --no-check-certificate
+  tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
+  cp -rf /usr/local/TensorRT-8.0.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.0.3.4/lib/* /usr/lib/
+  rm -rf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz
 elif [[ "$VERSION" == "cuda10.2" ]];then
-  wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda10.2-cudnn8.tar.gz --no-check-certificate
-  tar -zxf TensorRT7-cuda10.2-cudnn8.tar.gz -C /usr/local
-  cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/
-  rm TensorRT7-cuda10.2-cudnn8.tar.gz
+  if [[ "$CUDNN" == "cudnn8" ]];then
+    wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda10.2-cudnn8.tar.gz --no-check-certificate
+    tar -zxf TensorRT7-cuda10.2-cudnn8.tar.gz -C /usr/local
+    cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/
+    rm TensorRT7-cuda10.2-cudnn8.tar.gz
+  elif [[ "$CUDNN" == "cudnn7" ]];then
+    wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.2-cudnn7.tar.gz --no-check-certificate
+    tar -zxf TensorRT6-cuda10.2-cudnn7.tar.gz -C /usr/local
+    cp -rf /usr/local/TensorRT-6.0.1.8/include/*  /usr/include/ && cp -rf /usr/local/TensorRT-6.0.1.8/lib/* /usr/lib/
+    rm -rf TensorRT6-cuda10.2-cudnn7.tar.gz
+  fi
 fi
diff --git a/tools/dockerfiles/build_scripts/install_whl.sh b/tools/dockerfiles/build_scripts/install_whl.sh
index 2eb95db8830cf8b0a7b0fd7717f8663092d1556e..f937cf2a62bbf8b8912817a9766550bb43193a93 100644
--- a/tools/dockerfiles/build_scripts/install_whl.sh
+++ b/tools/dockerfiles/build_scripts/install_whl.sh
@@ -53,7 +53,7 @@ if [[ $SERVING_VERSION == "0.5.0" ]]; then
     fi
     client_release="paddle-serving-client==$SERVING_VERSION"
     app_release="paddle-serving-app==0.3.1"
-elif [[ $SERVING_VERSION == "0.6.0" ]]; then 
+else 
     if [[ "$RUN_ENV" == "cpu" ]];then
         server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl"
         serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz"
@@ -80,10 +80,10 @@ if [[ "$RUN_ENV" == "cpu" ]];then
     python$PYTHON_VERSION -m pip install $paddle_whl
     cd /usr/local/
     wget $serving_bin 
-    tar xf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz
-    mv $PWD/serving-cpu-noavx-openblas-${SERVING_VERSION} $PWD/serving_bin
+    tar xf serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz
+    mv $PWD/serving-cpu-avx-mkl-${SERVING_VERSION} $PWD/serving_bin
     echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
-    rm -rf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz
+    rm -rf serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz
     cd -
 elif [[ "$RUN_ENV" == "cuda10.1" ]];then
     python$PYTHON_VERSION -m pip install $client_release $app_release $server_release