提交 9cf29f60 编写于 作者: H HexToString

Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into fix_dead_link

......@@ -59,6 +59,8 @@ option(PACK "Compile for whl"
option(WITH_TRT "Compile Paddle Serving with TRT" OFF)
option(PADDLE_ON_INFERENCE "Compile for encryption" ON)
option(WITH_OPENCV "Compile Paddle Serving with OPENCV" OFF)
option(WITH_ROCM "Compile Paddle Serving with ROCM" OFF)
option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" OFF)
if(NOT DEFINED VERSION_TAG)
set(VERSION_TAG "0.0.0")
......@@ -163,6 +165,14 @@ if(SERVER)
endif()
list(APPEND EXTERNAL_LIBS paddlepaddle)
if(WITH_ROCM)
include(hip)
endif()
if(WITH_ASCEND_CL)
include(external/ascend)
list(APPEND EXTERNAL_LIBS ascend ascend_cl)
endif()
endif()
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#NOTE: Logic is from
# https://github.com/mindspore-ai/graphengine/blob/master/CMakeLists.txt
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH})
else()
set(ASCEND_DIR /usr/local/Ascend)
endif()
if(EXISTS ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include/graph/ascend_string.h)
# It means CANN 20.2 +
add_definitions(-DPADDLE_WITH_ASCEND_STRING)
endif()
if(WITH_ASCEND OR WITH_ASCEND_CL)
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
set(ASCEND_DRIVER_SHARE_DIR ${ASCEND_DIR}/driver/lib64/share)
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64)
set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64)
set(STATIC_ACL_LIB ${ASCEND_ACL_DIR})
set(ASCEND_MS_RUNTIME_PATH ${ASCEND_RUNTIME_DIR} ${ASCEND_ACL_DIR} ${ASCEND_ATC_DIR})
set(ASCEND_MS_DRIVER_PATH ${ASCEND_DRIVER_DIR} ${ASCEND_DRIVER_COMMON_DIR})
set(ATLAS_RUNTIME_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)
set(ATLAS_RUNTIME_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
set(ATLAS_ACL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/acllib/lib64)
set(ATLAS_ATC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/atc/lib64)
set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR})
set(atlas_graph_lib ${ATLAS_RUNTIME_DIR}/libgraph.so)
set(atlas_ge_runner_lib ${ATLAS_RUNTIME_DIR}/libge_runner.so)
set(atlas_acl_lib ${ATLAS_RUNTIME_DIR}/libascendcl.so)
INCLUDE_DIRECTORIES(${ATLAS_RUNTIME_INC_DIR})
ADD_LIBRARY(ascend_ge SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascend_ge PROPERTY IMPORTED_LOCATION ${atlas_ge_runner_lib})
ADD_LIBRARY(ascend_graph SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascend_graph PROPERTY IMPORTED_LOCATION ${atlas_graph_lib})
ADD_LIBRARY(atlas_acl SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib})
add_custom_target(extern_ascend DEPENDS ascend_ge ascend_graph atlas_acl)
endif()
if(WITH_ASCEND_CL)
set(ASCEND_CL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64)
set(ascend_hccl_lib ${ASCEND_CL_DIR}/libhccl.so)
set(ascendcl_lib ${ASCEND_CL_DIR}/libascendcl.so)
set(acl_op_compiler_lib ${ASCEND_CL_DIR}/libacl_op_compiler.so)
set(FWKACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include)
set(ACLLIB_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/acllib/include)
message(STATUS "FWKACLLIB_INC_DIR ${FWKACLLIB_INC_DIR}")
message(STATUS "ASCEND_CL_DIR ${ASCEND_CL_DIR}")
INCLUDE_DIRECTORIES(${FWKACLLIB_INC_DIR})
INCLUDE_DIRECTORIES(${ACLLIB_INC_DIR})
ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib})
ADD_LIBRARY(ascend_hccl SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib})
ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib})
add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler)
endif()
if(NOT WITH_ROCM)
return()
endif()
if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP REQUIRED)
include_directories(${ROCM_PATH}/include)
message(STATUS "HIP version: ${HIP_VERSION}")
message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")
macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
message(STATUS "${PACKAGE_NAME} version: ${${PACKAGE_NAME}_VERSION}")
endmacro()
find_package_and_include(miopen)
find_package_and_include(rocblas)
find_package_and_include(hiprand)
find_package_and_include(rocrand)
find_package_and_include(rccl)
find_package_and_include(rocthrust)
find_package_and_include(hipcub)
find_package_and_include(rocprim)
find_package_and_include(hipsparse)
find_package_and_include(rocsparse)
find_package_and_include(rocfft)
# set CXX flags for HIP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
# define HIP_CXX_FLAGS
list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
# Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-pass-failed)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g2)
list(APPEND HIP_CXX_FLAGS -O0)
list(APPEND HIP_HIPCC_FLAGS -fdebug-info-for-profiling)
endif(CMAKE_BUILD_TYPE MATCHES Debug)
set(HIP_HCC_FLAGS ${HIP_CXX_FLAGS})
set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906)
if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif()
message(STATUS "HIP library name: ${hip_library_name}")
# set HIP link libs
find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
#include(thrust)
......@@ -62,10 +62,27 @@ elseif (WITH_LITE)
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
endif()
elseif (WITH_ASCEND_CL)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
MESSAGE("paddle lite lib is unknown.")
SET(PADDLE_LIB_VERSION "paddle-lite-unknown")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/ASCEND/arm64_gcc7.5_openblas_lite2.10")
endif()
else()
MESSAGE("paddle lite lib is unknown.")
SET(PADDLE_LIB_VERSION "paddle-lite-unknown")
endif()
elseif (WITH_ROCM)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/rocm")
elseif (WITH_ASCEND_CL)
message("cpu arch: ${CMAKE_SYSTEM_PROCESSOR}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
MESSAGE("paddle lib is unknown.")
SET(PADDLE_LIB_VERSION "paddle-unknown")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/ASCEND/arm64_gcc8.2_openblas")
endif()
else()
if (WITH_AVX)
if (WITH_MKLML)
......@@ -79,7 +96,13 @@ else()
endif()
if(WITH_LITE)
if (WITH_XPU)
SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ")
elseif (WITH_ASCEND_CL)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tgz ")
endif()
elseif(WITH_ASCEND_CL)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz ")
else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif()
......@@ -150,6 +173,9 @@ endif()
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_ASCEND_CL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
endif()
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
......
......@@ -47,6 +47,7 @@ message EngineDesc {
optional bool combined_model = 18;
optional bool encrypted_model = 19;
optional bool gpu_multi_stream = 20;
optional bool use_ascend_cl = 21;
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
......
......@@ -18,7 +18,7 @@ add_executable(simple_client example/simple_client.cpp)
add_dependencies(simple_client utils sdk-cpp client)
target_link_libraries(simple_client -Wl,--whole-archive
-Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
-Wl,--no-whole-archive -Wl,--start-group -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
target_link_libraries(simple_client utils)
target_link_libraries(simple_client sdk-cpp)
......
......@@ -31,6 +31,10 @@ target_link_libraries(serving pdserving)
target_link_libraries(serving cube-api)
target_link_libraries(serving utils)
target_link_libraries(serving utf8proc)
if(WITH_ASCEND_CL)
target_link_libraries(serving ascendcl acl_op_compiler)
endif()
if(WITH_GPU)
target_link_libraries(serving ${CUDA_LIBRARIES})
......@@ -63,3 +67,7 @@ install(FILES
DESTINATION
${PADDLE_SERVING_INSTALL_DIR}/demo/serving/bin)
endif()
if (WITH_ROCM)
target_link_libraries(serving ${ROCM_HIPRTC_LIB})
endif()
......@@ -84,8 +84,8 @@ workdir_9393
| Argument | Type | Default | Description |
| ---------------------------------------------- | ---- | ------- | ----------------------------------------------------- |
| `thread` | int | `2` | Number of brpc service thread |
| `op_num` | int[]| `0` | Thread Number for each model in asynchronous mode |
| `op_max_batch` | int[]| `32` | Batch Number for each model in asynchronous mode |
| `runtime_thread_num` | int[]| `0` | Thread Number for each model in asynchronous mode |
| `batch_infer_size` | int[]| `32` | Batch Number for each model in asynchronous mode |
| `gpu_ids` | str[]| `"-1"` | Gpu card id for each model |
| `port` | int | `9292` | Exposed port of current service to users |
| `model` | str[]| `""` | Path of paddle model directory to be served |
......@@ -98,6 +98,7 @@ workdir_9393
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 |
#### 当您的某个模型想使用多张GPU卡部署时.
```BASH
......@@ -249,6 +250,7 @@ engines {
use_gpu: false
combined_model: false
gpu_multi_stream: false
use_ascend_cl: false
runtime_thread_num: 0
batch_infer_size: 32
enable_overrun: false
......@@ -286,6 +288,7 @@ gpu_ids: 2
- use_gpu:是否使用GPU
- combined_model: 是否使用组合模型文件
- gpu_multi_stream: 是否开启gpu多流模式
- use_ascend_cl: 是否使用昇腾,单独开启适配昇腾910,同时开启lite适配310
- runtime_thread_num: 若大于0, 则启用Async异步模式,并创建对应数量的predictor实例。
- batch_infer_size: Async异步模式下的最大batch数
- enable_overrun: Async异步模式下总是将整个任务放入任务队列
......@@ -357,7 +360,7 @@ op:
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["concat_1.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
......@@ -395,7 +398,7 @@ op:
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
......@@ -434,10 +437,12 @@ Python Pipeline除了支持CPU、GPU之外,还支持多种异构硬件部署
- TensorRT : 2
- CPU(Arm) : 3
- XPU : 4
- Ascend310(Arm) : 5
- Ascend910(Arm) : 6
config.yml中硬件配置:
```YAML
#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
#计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "" # "0,1"
......
......@@ -84,8 +84,8 @@ More flags:
| Argument | Type | Default | Description |
| ---------------------------------------------- | ---- | ------- | ----------------------------------------------------- |
| `thread` | int | `2` | Number of brpc service thread |
| `op_num` | int[]| `0` | Thread Number for each model in asynchronous mode |
| `op_max_batch` | int[]| `32` | Batch Number for each model in asynchronous mode |
| `runtime_thread_num` | int[]| `0` | Thread Number for each model in asynchronous mode |
| `batch_infer_size` | int[]| `32` | Batch Number for each model in asynchronous mode |
| `gpu_ids` | str[]| `"-1"` | Gpu card id for each model |
| `port` | int | `9292` | Exposed port of current service to users |
| `model` | str[]| `""` | Path of paddle model directory to be served |
......@@ -98,6 +98,7 @@ More flags:
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
| `use_ascend_cl` | bool | False | Enable for ascend910; Use with use_lite for ascend310 |
#### Serving model with multiple gpus.
```BASH
......@@ -258,6 +259,7 @@ engines {
use_gpu: false
combined_model: false
gpu_multi_stream: false
use_ascend_cl: false
runtime_thread_num: 0
batch_infer_size: 32
enable_overrun: false
......@@ -293,6 +295,7 @@ gpu_ids: 2
- use_gpu: Enbale GPU.
- combined_model: Enable combined model.
- gpu_multi_stream: Enable gpu multiple stream mode.
- use_ascend_cl: Enable Ascend, use individually for ascend910, use with lite for ascend310
- runtime_thread_num: Enable Async mode when num greater than 0 and creating predictors.
- batch_infer_size: The max batch size of Async mode.
- enable_overrun: Enable over running of Async mode which means putting the whole task into the task queue.
......@@ -380,7 +383,7 @@ op:
#Fetch data list
fetch_list: ["concat_1.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#Device ID
......@@ -418,7 +421,7 @@ op:
#Fetch data list
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
#Device ID
......@@ -459,10 +462,12 @@ In addition to supporting CPU and GPU, Pipeline also supports the deployment of
- TensorRT : 2
- CPU(Arm) : 3
- XPU : 4
- Ascend310(Arm) : 5
- Ascend910(Arm) : 6
Reference config.yaml:
```YAML
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu, 5=arm ascend310, 6=arm ascend910
device_type: 0
devices: "" # "0,1"
```
......
......@@ -41,6 +41,9 @@ using paddle_infer::CreatePredictor;
DECLARE_int32(gpuid);
DECLARE_string(precision);
DECLARE_bool(use_calib);
DECLARE_string(nnadapter_device_names);
DECLARE_string(nnadapter_context_properties);
DECLARE_string(nnadapter_model_cache_dir);
static const int max_batch = 32;
static const int min_subgraph_size = 3;
......@@ -237,6 +240,7 @@ class PaddleInferenceEngine : public EngineCore {
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
config.EnableLiteEngine(precision_type, true);
config.SwitchIrOptim(true);
}
if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
......@@ -269,6 +273,33 @@ class PaddleInferenceEngine : public EngineCore {
config.SetXpuDeviceId(gpu_id);
}
if (engine_conf.has_use_ascend_cl() &&
engine_conf.use_ascend_cl()) {
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
// for ascend 310
FLAGS_nnadapter_device_names = "huawei_ascend_npu";
FLAGS_nnadapter_context_properties =
"HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" +
std::to_string(gpu_id);
FLAGS_nnadapter_model_cache_dir = "";
config.NNAdapter()
.Enable()
.SetDeviceNames({FLAGS_nnadapter_device_names})
.SetContextProperties(FLAGS_nnadapter_context_properties)
.SetModelCacheDir(FLAGS_nnadapter_model_cache_dir);
LOG(INFO) << "Enable Lite NNAdapter for Ascend,"
<< "nnadapter_device_names="
<< FLAGS_nnadapter_device_names
<< ",nnadapter_context_properties="
<< FLAGS_nnadapter_context_properties
<< ",nnadapter_model_cache_dir="
<< FLAGS_nnadapter_model_cache_dir;
} else {
// for ascend 910
config.EnableNpu(gpu_id);
}
}
if (engine_conf.has_enable_memory_optimization() &&
engine_conf.enable_memory_optimization()) {
config.EnableMemoryOptim();
......
......@@ -22,6 +22,11 @@ namespace inference {
DEFINE_int32(gpuid, 0, "GPU device id to use");
DEFINE_string(precision, "fp32", "precision to deploy, default is fp32");
DEFINE_bool(use_calib, false, "calibration mode, default is false");
DEFINE_string(nnadapter_device_names, "", "Names of nnadapter device");
DEFINE_string(nnadapter_context_properties,
"",
"Properties of nnadapter context");
DEFINE_string(nnadapter_model_cache_dir, "", "Cache dir of nnadapter model");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
......
......@@ -12,6 +12,10 @@ if (SERVER)
set(SERVER_PACKAGE_NAME "paddle-serving-server-gpu")
elseif(WITH_XPU)
set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu")
elseif(WITH_ROCM)
set(SERVER_PACKAGE_NAME "paddle-serving-server-rocm")
elseif(WITH_ASCEND_CL)
set(SERVER_PACKAGE_NAME "paddle-serving-server-npu")
endif()
file(INSTALL pipeline DESTINATION paddle_serving_server)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
......
......@@ -43,6 +43,10 @@ if package_name.endswith('gpu'):
update_info("paddle_serving_server/version.py", "device_type", "1")
elif package_name.endswith('xpu'):
update_info("paddle_serving_server/version.py", "device_type", "2")
elif package_name.endswith('rocm'):
update_info("paddle_serving_server/version.py", "device_type", "3")
elif package_name.endswith('npu'):
update_info("paddle_serving_server/version.py", "device_type", "4")
path = "paddle_serving_" + sys.argv[1]
commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
......
......@@ -86,7 +86,8 @@ class LocalPredictor(object):
mkldnn_cache_capacity=0,
mkldnn_op_list=None,
mkldnn_bf16_op_list=None,
use_feed_fetch_ops=False):
use_feed_fetch_ops=False,
use_ascend_cl=False):
"""
Load model configs and create the paddle predictor by Paddle Inference API.
......@@ -108,6 +109,7 @@ class LocalPredictor(object):
mkldnn_op_list: op list accelerated using MKLDNN, None default.
mkldnn_bf16_op_list: op list accelerated using MKLDNN bf16, None default.
use_feed_fetch_ops: use feed/fetch ops, False default.
use_ascend_cl: run predict on Huawei Ascend, False default
"""
gpu_id = int(gpu_id)
client_config = "{}/serving_server_conf.prototxt".format(model_path)
......@@ -146,11 +148,12 @@ class LocalPredictor(object):
"gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{}, "
"use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, "
"use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, ".format(
"mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, "
"use_ascend_cl:{} ".format(
model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
ir_optim, use_trt, use_lite, use_xpu, precision, use_calib,
use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list,
mkldnn_bf16_op_list, use_feed_fetch_ops))
mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl))
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
......@@ -215,11 +218,28 @@ class LocalPredictor(object):
zero_copy=True,
passes_filter=[],
ops_filter=[])
config.switch_ir_optim(True)
# set xpu
if use_xpu:
# 2MB l3 cache
config.enable_xpu(8 * 1024 * 1024)
config.set_xpu_device_id(gpu_id)
# set ascend cl
if use_ascend_cl:
if use_lite:
# for ascend 310
nnadapter_device_names = "huawei_ascend_npu"
nnadapter_context_properties = \
"HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id)
nnadapter_model_cache_dir = ""
config.nnadapter() \
.enable() \
.set_device_names([nnadapter_device_names]) \
.set_context_properties(nnadapter_context_properties) \
.set_model_cache_dir(nnadapter_model_cache_dir)
else:
# for ascend 910
config.enable_npu(gpu_id)
# set cpu low precision
if not use_gpu and not use_lite:
if precision_type == paddle_infer.PrecisionType.Int8:
......
......@@ -183,6 +183,11 @@ def serve_args():
"--use_lite", default=False, action="store_true", help="Use PaddleLite")
parser.add_argument(
"--use_xpu", default=False, action="store_true", help="Use XPU")
parser.add_argument(
"--use_ascend_cl",
default=False,
action="store_true",
help="Use Ascend CL")
parser.add_argument(
"--product_name",
type=str,
......@@ -299,13 +304,15 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
server.set_device(device)
if args.use_xpu:
server.set_xpu()
if args.use_ascend_cl:
server.set_ascend_cl()
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
if gpu_mode == True:
if gpu_mode == True or args.use_xpu or args.use_ascend_cl:
server.set_gpuid(args.gpu_ids)
server.load_model_config(model)
server.prepare_server(
......
......@@ -88,6 +88,7 @@ class Server(object):
self.gpu_multi_stream = False
self.use_lite = False
self.use_xpu = False
self.use_ascend_cl = False
self.model_config_paths = collections.OrderedDict()
self.product_name = None
self.container_id = None
......@@ -195,6 +196,9 @@ class Server(object):
def set_xpu(self):
self.use_xpu = True
def set_ascend_cl(self):
self.use_ascend_cl = True
def _prepare_engine(self, model_config_paths, device, use_encryption_model):
self.device = device
if self.model_toolkit_conf == None:
......@@ -208,6 +212,8 @@ class Server(object):
if self.device == "gpu" or self.use_trt or self.gpu_multi_stream:
self.gpuid = ["0"]
self.device = "gpu"
elif self.use_xpu or self.use_ascend_cl:
self.gpuid = ["0"]
else:
self.gpuid = ["-1"]
......@@ -244,6 +250,7 @@ class Server(object):
engine.gpu_multi_stream = self.gpu_multi_stream
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
engine.use_ascend_cl = self.use_ascend_cl
engine.use_gpu = False
if len(self.gpuid) == 0:
......@@ -436,6 +443,13 @@ class Server(object):
device_version = "gpu-cuda" + version_suffix
elif device_type == "2":
device_version = "xpu-" + platform.machine()
elif device_type == "3":
device_version = "rocm-" + platform.machine()
elif device_type == "4":
if self.use_lite:
device_version = "ascendcl-lite-" + platform.machine()
else:
device_version = "ascendcl-" + platform.machine()
return device_version
def download_bin(self):
......
......@@ -86,6 +86,7 @@ class LocalServiceHandler(object):
self._use_trt = False
self._use_lite = False
self._use_xpu = False
self._use_ascend_cl = False
self._use_mkldnn = False
self._mkldnn_cache_capacity = 0
self._mkldnn_op_list = None
......@@ -129,6 +130,17 @@ class LocalServiceHandler(object):
devices = [int(x) for x in devices.split(",")]
self._use_lite = True
self._use_xpu = True
elif device_type == 5:
# Ascend 310 ARM CPU
self._device_name = "arm"
devices = [int(x) for x in devices.split(",")]
self._use_lite = True
self._use_ascend_cl = True
elif device_type == 6:
# Ascend 910 ARM CPU
self._device_name = "arm"
devices = [int(x) for x in devices.split(",")]
self._use_ascend_cl = True
else:
_LOGGER.error(
"LocalServiceHandler initialization fail. device_type={}"
......@@ -163,13 +175,14 @@ class LocalServiceHandler(object):
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
"client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, "
"mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
"mkldnn_bf16_op_list:{}".format(
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}".format(
model_config, self._device_name, self._use_gpu, self._use_trt,
self._use_lite, self._use_xpu, device_type, self._devices,
self._mem_optim, self._ir_optim, self._use_profile,
self._thread_num, self._client_type, self._fetch_names,
self._precision, self._use_mkldnn, self._mkldnn_cache_capacity,
self._mkldnn_op_list, self._mkldnn_bf16_op_list))
self._mkldnn_op_list, self._mkldnn_bf16_op_list,
self._use_ascend_cl))
def get_fetch_list(self):
return self._fetch_names
......@@ -225,7 +238,8 @@ class LocalServiceHandler(object):
use_mkldnn=self._use_mkldnn,
mkldnn_cache_capacity=self._mkldnn_cache_capacity,
mkldnn_op_list=self._mkldnn_op_list,
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list)
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
use_ascend_cl=self._use_ascend_cl)
return self._local_predictor_client
def get_client_config(self):
......@@ -284,6 +298,8 @@ class LocalServiceHandler(object):
server.set_xpu()
if self._use_lite:
server.set_lite()
if self._use_ascend_cl:
server.set_ascend_cl()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册