diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 0ab248f8c8a0bca9fa6f97f4520a5a9781c9b239..5eca6c5ba88ca50909e78a3d3b67c60c228c8207 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -25,7 +25,7 @@ set(BOOST_PROJECT "extern_boost") set(BOOST_VER "1.74.0") set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE) -set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) +set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") diff --git a/cmake/external/cudnn.cmake b/cmake/external/cudnn.cmake index 98466d44fc0dd91ef0cc8e8eac2660c42a19267c..f560d8723523264881d692a88a2d276035a7ac77 100644 --- a/cmake/external/cudnn.cmake +++ b/cmake/external/cudnn.cmake @@ -61,8 +61,11 @@ else() endif() if(CUDNN_FOUND) - file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) - + if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) + elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + endif() get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 240c85762e41dc15fa9c9e559c0857352d1ba39c..c923486948f3d08af9a672b0ebad1c26708a6eff 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -27,12 +27,12 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/ message( "WITH_GPU = ${WITH_GPU}") - # Paddle Version should be one of: # latest: latest develop build # version number like 1.5.2 SET(PADDLE_VERSION "2.2.0-rc0") if (WITH_GPU) + message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}") # cuda 11.0 is not supported, 11.2 would be added. if(CUDA_VERSION EQUAL 10.1) set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5") @@ -52,14 +52,19 @@ if (WITH_GPU) else() set(WITH_TRT OFF) endif() - if (WITH_GPU) SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}") elseif (WITH_LITE) + message("cpu arch: ${CMAKE_SYSTEM_PROCESSOR}") if (WITH_XPU) - SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas") + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86-64") + SET(PADDLE_LIB_VERSION "x86-64_gcc8.2_avx_mkl") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas") + endif() else() - SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}") + MESSAGE("paddle lite lib is unknown.") + SET(PADDLE_LIB_VERSION "paddle-lite-unknown") endif() else() if (WITH_AVX) diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp index 4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5..cc55dd30a5649afac98810fb83f98a837932a523 100644 --- a/core/general-client/src/client.cpp +++ b/core/general-client/src/client.cpp @@ -23,8 +23,7 @@ using configure::GeneralModelConfig; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res, output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); } else if (fetch_name_to_type[name] == P_UINT8 - || fetch_name_to_type[name] == P_INT8) { + || fetch_name_to_type[name] == P_INT8 + || fetch_name_to_type[name] == P_FP16) { VLOG(2) << "fetch var [" << name << "]type=" << fetch_name_to_type[name]; string_data_map[name] = output.tensor(idx).tensor_content(); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index fb71c0c9fc6e3680b8b51bad9ca891e41ef3a849..403119594c759a35d5dfd6251174627f367d9c65 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 482097d3e1fa1c7f7369573b1b1a0a5fde57ae58..2ad3e4cab6b77b305494c3833f0e3781ed0fd0b7 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -130,11 +129,11 @@ int GeneralReaderOp::inference() { data_len = tensor.tensor_content().size(); src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_FP16) { - // paddle inference will support FLOAT16 - // elem_size = 1; - // paddleTensor.dtype = paddle::PaddleDType::FLOAT16; - // data_len = tensor.tensor_content().size(); - // src_ptr = tensor.tensor_content().data(); + // copy bytes from tensor content to TensorVector + elem_size = 1; + paddleTensor.dtype = paddle::PaddleDType::FLOAT16; + data_len = tensor.tensor_content().size(); + src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_STRING) { // use paddle::PaddleDType::UINT8 as for String. elem_size = sizeof(char); diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index e944c8d82d8aa2ad540455200cf835ce26eb366e..07d3473ec6ce12373114bfc50a67890ac2757634 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -178,14 +178,12 @@ int GeneralResponseOp::inference() { VLOG(2) << "(logid=" << log_id << ")Prepare int8 var [" << model_config->_fetch_name[idx] << "]."; tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - } - // inference will support fp16 - // else if (dtype == paddle::PaddleDType::FLOAT16) { - // tensor->set_elem_type(5); - // VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" - // << model_config->_fetch_name[idx] << "]."; - // tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - // } + } else if (dtype == paddle::PaddleDType::FLOAT16) { + tensor->set_elem_type(5); + VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" + << model_config->_fetch_name[idx] << "]."; + tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + } VLOG(2) << "(logid=" << log_id << ") fetch var [" << model_config->_fetch_name[idx] << "] ready"; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index a824acaff2417dcb5e885c0ae9e1acd6c17e7def..a6815d4939edfb2a0d6dcebaa602b545b770d52f 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -31,6 +31,7 @@ #include "core/predictor/framework/infer_data.h" #include "core/predictor/framework/memory.h" #include "paddle_inference_api.h" // NOLINT +#include "experimental/float16.h" namespace baidu { namespace paddle_serving { namespace predictor { @@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { paddle::PaddleDType::INT8) { int8_t* data = static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); + } else if ((*tensorVector_in_pointer)[i].dtype == + paddle::PaddleDType::FLOAT16) { + paddle::platform::float16* data = + static_cast(origin_data); + lod_tensor_in->CopyFromCpu(data); } else { LOG(ERROR) << "Inference not support type[" << (*tensorVector_in_pointer)[i].dtype << "],name[" << (*tensorVector_in_pointer)[i].name << "]" << " copy into core failed!"; } - // Paddle inference will support FP16 in next version. - // else if ((*tensorVector_in_pointer)[i].dtype == - // paddle::PaddleDType::FLOAT16) { - // paddle::platform::float16* data = - // static_cast(origin_data); - // lod_tensor_in->CopyFromCpu(data); - // } VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype << ";tensor_dtype=" << lod_tensor_in->type(); @@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { int8_t* data_out = reinterpret_cast(databuf_data); lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); + } else if (dataType == paddle::PaddleDType::FLOAT16) { + databuf_size = out_num * sizeof(paddle::platform::float16); + databuf_data = MempoolWrapper::instance().malloc(databuf_size); + if (!databuf_data) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size; + return -1; + } + paddle::platform::float16* data_out = + reinterpret_cast(databuf_data); + lod_tensor_out->CopyToCpu(data_out); + databuf_char = reinterpret_cast(data_out); } - // Inference will support FP16 in next version - // else if (dataType == paddle::PaddleDType::FLOAT16) { - // using float16 = paddle::platform::float16; - // databuf_size = out_num * sizeof(float16); - // databuf_data = MempoolWrapper::instance().malloc(databuf_size); - // if (!databuf_data) { - // LOG(ERROR) << "Malloc failed, size: " << databuf_size; - // return -1; - // } - // float16* data_out = reinterpret_cast(databuf_data); - // lod_tensor_out->CopyToCpu(data_out); - // databuf_char = reinterpret_cast(data_out); - // } // Because task scheduling requires OPs to use 'Channel' // (which is a data structure) to transfer data between OPs. diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index 7cc8120f4eb818905c303b22a0b00d6b205bddb4..c76147b6842b9f01b3b4f65785102766d3940aef 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -266,6 +266,7 @@ class PaddleInferenceEngine : public EngineCore { if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) { // 2 MB l3 cache config.EnableXpu(2 * 1024 * 1024); + config.SetXpuDeviceId(gpu_id); } if (engine_conf.has_enable_memory_optimization() && diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 589420ad45ae7f347c8e7b9b25c5cc0034830263..86740ce6033f9dbab490e9b9df21ffd17ebe0cd0 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -72,9 +72,13 @@ if (SERVER) if(CUDA_VERSION EQUAL 10.1) set(VERSION_SUFFIX 101) elseif(CUDA_VERSION EQUAL 10.2) - set(VERSION_SUFFIX 102) - elseif(CUDA_VERSION EQUAL 11.0) - set(VERSION_SUFFIX 11) + if(CUDNN_MAJOR_VERSION EQUAL 7) + set(VERSION_SUFFIX 1027) + elseif(CUDNN_MAJOR_VERSION EQUAL 8) + set(VERSION_SUFFIX 1028) + endif() + elseif(CUDA_VERSION EQUAL 11.2) + set(VERSION_SUFFIX 112) endif() endif() diff --git a/python/examples/cascade_rcnn/label_list.txt b/python/examples/cascade_rcnn/label_list.txt index d7d43a94adf73208f997f0efd6581bef11ca734e..941cb4e1392266f6a6c09b1fdc5f79503b2e5df6 100644 --- a/python/examples/cascade_rcnn/label_list.txt +++ b/python/examples/cascade_rcnn/label_list.txt @@ -1,4 +1,3 @@ -background person bicycle car diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index afe4ba62d69850482e82ba97d43ac747e0f69aaf..7de419530462b59f733f6ecc81e8b2fd9ce61b80 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -219,6 +219,7 @@ class LocalPredictor(object): if use_xpu: # 2MB l3 cache config.enable_xpu(8 * 1024 * 1024) + config.set_xpu_device_id(gpu_id) # set cpu low precision if not use_gpu and not use_lite: if precision_type == paddle_infer.PrecisionType.Int8: diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index 9a8bb3db0180b8ce4617aeee6c7462da490884d8..648678f3afd9ffdc0af4c505779fc5eca0c42a37 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -551,6 +551,22 @@ class Client(object): tmp_lod = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0: result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == float16_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.float16) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index f21e13aaf40dd7720cab87da046c6754845a9bd4..0510579d7c4225d9bef81d880e01642ea93efd90 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -428,7 +428,7 @@ class Server(object): if device_type == "0": device_version = self.get_device_version() elif device_type == "1": - if version_suffix == "101" or version_suffix == "102": + if version_suffix == "101" or version_suffix == "1027" or version_suffix == "1028" or version_suffix == "112": device_version = "gpu-" + version_suffix else: device_version = "gpu-cuda" + version_suffix diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index d04b96547e9fb2f7fa35d0983b6cb046f505e698..d9df5e3091053a62c98fd108a5985a1e518a7767 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -280,6 +280,10 @@ class LocalServiceHandler(object): server.set_gpuid(gpuid) # TODO: support arm or arm + xpu later server.set_device(self._device_name) + if self._use_xpu: + server.set_xpu() + if self._use_lite: + server.set_lite() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) diff --git a/tools/Dockerfile.cuda11-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn7.devel similarity index 98% rename from tools/Dockerfile.cuda11-cudnn8.devel rename to tools/Dockerfile.cuda10.2-cudnn7.devel index 2b4e75c7b112e7616ec91d03e12774386c974448..6425a7a39ec1ca84a3f4d5ab305bcb6b413862bc 100644 --- a/tools/Dockerfile.cuda11-cudnn8.devel +++ b/tools/Dockerfile.cuda10.2-cudnn7.devel @@ -1,7 +1,7 @@ # A image for building paddle binaries # Use cuda devel base image for both cpu and gpu environment # When you modify it, please be aware of cudnn-runtime version -FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu16.04 +FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu16.04 MAINTAINER PaddlePaddle Authors # ENV variables @@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH} # Downgrade TensorRT COPY tools/dockerfiles/build_scripts /build_scripts -RUN bash /build_scripts/install_trt.sh cuda11 +RUN bash /build_scripts/install_trt.sh cuda10.2 cudnn7 RUN rm -rf /build_scripts # git credential to skip password typing diff --git a/tools/Dockerfile.cuda10.2-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn8.devel index 0b1f4f1cb519792ba9f917a1e7313906e9eea3b8..d07731343bb9bfd28f59dd4dcf240bcb26d302f5 100644 --- a/tools/Dockerfile.cuda10.2-cudnn8.devel +++ b/tools/Dockerfile.cuda10.2-cudnn8.devel @@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH} # Downgrade TensorRT COPY tools/dockerfiles/build_scripts /build_scripts -RUN bash /build_scripts/install_trt.sh cuda10.2 +RUN bash /build_scripts/install_trt.sh cuda10.2 cudnn8 RUN rm -rf /build_scripts # git credential to skip password typing diff --git a/tools/dockerfiles/build_scripts/install_trt.sh b/tools/dockerfiles/build_scripts/install_trt.sh index 559a5552464a4ffb8891446548aa16537b06e059..b882bd216cb0746bb907ad5314befb2a939ccb68 100644 --- a/tools/dockerfiles/build_scripts/install_trt.sh +++ b/tools/dockerfiles/build_scripts/install_trt.sh @@ -15,20 +15,28 @@ # limitations under the License. VERSION=$1 +CUDNN=$2 if [[ "$VERSION" == "cuda10.1" ]];then wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.1-cudnn7.tar.gz --no-check-certificate tar -zxf TensorRT6-cuda10.1-cudnn7.tar.gz -C /usr/local cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/lib/* /usr/lib/ echo "cuda10.1 trt install ==============>>>>>>>>>>>>" rm TensorRT6-cuda10.1-cudnn7.tar.gz -elif [[ "$VERSION" == "cuda11" ]];then - wget -q https://paddle-ci.cdn.bcebos.com/TRT/TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz --no-check-certificate - tar -zxf TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -C /usr/local - cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ - rm TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz +elif [[ "$VERSION" == "cuda11.2" ]];then + wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz --no-check-certificate + tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz + cp -rf /usr/local/TensorRT-8.0.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.0.3.4/lib/* /usr/lib/ + rm -rf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz elif [[ "$VERSION" == "cuda10.2" ]];then - wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda10.2-cudnn8.tar.gz --no-check-certificate - tar -zxf TensorRT7-cuda10.2-cudnn8.tar.gz -C /usr/local - cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ - rm TensorRT7-cuda10.2-cudnn8.tar.gz + if [[ "$CUDNN" == "cudnn8" ]];then + wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda10.2-cudnn8.tar.gz --no-check-certificate + tar -zxf TensorRT7-cuda10.2-cudnn8.tar.gz -C /usr/local + cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ + rm TensorRT7-cuda10.2-cudnn8.tar.gz + elif [[ "$CUDNN" == "cudnn7" ]];then + wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.2-cudnn7.tar.gz --no-check-certificate + tar -zxf TensorRT6-cuda10.2-cudnn7.tar.gz -C /usr/local + cp -rf /usr/local/TensorRT-6.0.1.8/include/* /usr/include/ && cp -rf /usr/local/TensorRT-6.0.1.8/lib/* /usr/lib/ + rm -rf TensorRT6-cuda10.2-cudnn7.tar.gz + fi fi diff --git a/tools/dockerfiles/build_scripts/install_whl.sh b/tools/dockerfiles/build_scripts/install_whl.sh index 2eb95db8830cf8b0a7b0fd7717f8663092d1556e..f937cf2a62bbf8b8912817a9766550bb43193a93 100644 --- a/tools/dockerfiles/build_scripts/install_whl.sh +++ b/tools/dockerfiles/build_scripts/install_whl.sh @@ -53,7 +53,7 @@ if [[ $SERVING_VERSION == "0.5.0" ]]; then fi client_release="paddle-serving-client==$SERVING_VERSION" app_release="paddle-serving-app==0.3.1" -elif [[ $SERVING_VERSION == "0.6.0" ]]; then +else if [[ "$RUN_ENV" == "cpu" ]];then server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl" serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz" @@ -80,10 +80,10 @@ if [[ "$RUN_ENV" == "cpu" ]];then python$PYTHON_VERSION -m pip install $paddle_whl cd /usr/local/ wget $serving_bin - tar xf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz - mv $PWD/serving-cpu-noavx-openblas-${SERVING_VERSION} $PWD/serving_bin + tar xf serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz + mv $PWD/serving-cpu-avx-mkl-${SERVING_VERSION} $PWD/serving_bin echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc - rm -rf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz + rm -rf serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz cd - elif [[ "$RUN_ENV" == "cuda10.1" ]];then python$PYTHON_VERSION -m pip install $client_release $app_release $server_release