From b0953a3f14b9806be62dd155a1cfcab34a2f09fc Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Fri, 5 Nov 2021 14:43:57 +0800 Subject: [PATCH] support ascend_cl arm64 with lite for ascend310 --- CMakeLists.txt | 1 + cmake/paddlepaddle.cmake | 16 ++++++++++- core/configure/proto/server_configure.proto | 1 + core/general-client/CMakeLists.txt | 2 +- .../paddle/include/paddle_engine.h | 27 +++++++++++++++++++ paddle_inference/paddle/src/paddle_engine.cpp | 5 ++++ python/CMakeLists.txt | 2 ++ python/gen_version.py | 2 ++ python/paddle_serving_app/local_predict.py | 22 ++++++++++++--- python/paddle_serving_server/serve.py | 6 ++++- python/paddle_serving_server/server.py | 12 +++++++++ python/pipeline/local_service_handler.py | 17 +++++++++--- 12 files changed, 104 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e41cfe8c..0561322d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ option(WITH_TRT "Compile Paddle Serving with TRT" option(PADDLE_ON_INFERENCE "Compile for encryption" ON) option(WITH_OPENCV "Compile Paddle Serving with OPENCV" OFF) option(WITH_ROCM "Compile Paddle Serving with ROCM" OFF) +option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" OFF) if(NOT DEFINED VERSION_TAG) set(VERSION_TAG "0.0.0") diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 7c9d9309..6d9ca3eb 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -62,6 +62,13 @@ elseif (WITH_LITE) elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas") endif() + elseif (WITH_ASCEND_CL) + if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + MESSAGE("paddle lite lib is unknown.") + SET(PADDLE_LIB_VERSION "paddle-lite-unknown") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/ASCEND/arm64_gcc7.5_openblas_lite2.10") + endif() else() MESSAGE("paddle lite lib is unknown.") SET(PADDLE_LIB_VERSION "paddle-lite-unknown") @@ -81,7 +88,11 @@ else() endif() if(WITH_LITE) - SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ") + if (WITH_XPU) + SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ") + elseif (WITH_ASCEND_CL) + SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tgz ") + endif() else() SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") endif() @@ -152,6 +163,9 @@ endif() ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a) +if (WITH_ASCEND_CL AND WITH_LITE) + SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) +endif() if (WITH_TRT) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index 13b9d395..c974f010 100755 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -47,6 +47,7 @@ message EngineDesc { optional bool combined_model = 18; optional bool encrypted_model = 19; optional bool gpu_multi_stream = 20; + optional bool use_ascend_cl = 21; /* * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt index 0a7f2ee4..21355e47 100644 --- a/core/general-client/CMakeLists.txt +++ b/core/general-client/CMakeLists.txt @@ -18,7 +18,7 @@ add_executable(simple_client example/simple_client.cpp) add_dependencies(simple_client utils sdk-cpp client) target_link_libraries(simple_client -Wl,--whole-archive - -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib) + -Wl,--no-whole-archive -Wl,--start-group -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib) target_link_libraries(simple_client utils) target_link_libraries(simple_client sdk-cpp) diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index c76147b6..4c3481fe 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -41,6 +41,9 @@ using paddle_infer::CreatePredictor; DECLARE_int32(gpuid); DECLARE_string(precision); DECLARE_bool(use_calib); +DECLARE_string(nnadapter_device_names); +DECLARE_string(nnadapter_context_properties); +DECLARE_string(nnadapter_model_cache_dir); static const int max_batch = 32; static const int min_subgraph_size = 3; @@ -237,6 +240,7 @@ class PaddleInferenceEngine : public EngineCore { if (engine_conf.has_use_lite() && engine_conf.use_lite()) { config.EnableLiteEngine(precision_type, true); + config.SwitchIrOptim(true); } if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) || @@ -269,6 +273,29 @@ class PaddleInferenceEngine : public EngineCore { config.SetXpuDeviceId(gpu_id); } + if (engine_conf.has_use_ascend_cl() && + engine_conf.use_ascend_cl()) { + if (engine_conf.has_use_lite() && engine_conf.use_lite()) { + FLAGS_nnadapter_device_names = "huawei_ascend_npu"; + FLAGS_nnadapter_context_properties = + "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" + + std::to_string(gpu_id); + FLAGS_nnadapter_model_cache_dir = ""; + config.NNAdapter() + .Enable() + .SetDeviceNames({FLAGS_nnadapter_device_names}) + .SetContextProperties(FLAGS_nnadapter_context_properties) + .SetModelCacheDir(FLAGS_nnadapter_model_cache_dir); + LOG(INFO) << "Enable Lite NNAdapter for Ascend," + << "nnadapter_device_names=" + << FLAGS_nnadapter_device_names + << ",nnadapter_context_properties=" + << FLAGS_nnadapter_context_properties + << ",nnadapter_model_cache_dir=" + << FLAGS_nnadapter_model_cache_dir; + } + } + if (engine_conf.has_enable_memory_optimization() && engine_conf.enable_memory_optimization()) { config.EnableMemoryOptim(); diff --git a/paddle_inference/paddle/src/paddle_engine.cpp b/paddle_inference/paddle/src/paddle_engine.cpp index b6da2a5a..dc6ffd81 100644 --- a/paddle_inference/paddle/src/paddle_engine.cpp +++ b/paddle_inference/paddle/src/paddle_engine.cpp @@ -22,6 +22,11 @@ namespace inference { DEFINE_int32(gpuid, 0, "GPU device id to use"); DEFINE_string(precision, "fp32", "precision to deploy, default is fp32"); DEFINE_bool(use_calib, false, "calibration mode, default is false"); +DEFINE_string(nnadapter_device_names, "", "Names of nnadapter device"); +DEFINE_string(nnadapter_context_properties, + "", + "Properties of nnadapter context"); +DEFINE_string(nnadapter_model_cache_dir, "", "Cache dir of nnadapter model"); REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::FluidInferEngine, diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ae70e3ec..3fd4a6f2 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -14,6 +14,8 @@ if (SERVER) set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu") elseif(WITH_ROCM) set(SERVER_PACKAGE_NAME "paddle-serving-server-rocm") + elseif(WITH_ASCEND_CL) + set(SERVER_PACKAGE_NAME "paddle-serving-server-npu") endif() file(INSTALL pipeline DESTINATION paddle_serving_server) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) diff --git a/python/gen_version.py b/python/gen_version.py index de1e373d..c18f030c 100644 --- a/python/gen_version.py +++ b/python/gen_version.py @@ -45,6 +45,8 @@ elif package_name.endswith('xpu'): update_info("paddle_serving_server/version.py", "device_type", "2") elif package_name.endswith('rocm'): update_info("paddle_serving_server/version.py", "device_type", "3") +elif package_name.endswith('npu'): + update_info("paddle_serving_server/version.py", "device_type", "4") path = "paddle_serving_" + sys.argv[1] commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD']) diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index 7de41953..d9a54efb 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -86,7 +86,8 @@ class LocalPredictor(object): mkldnn_cache_capacity=0, mkldnn_op_list=None, mkldnn_bf16_op_list=None, - use_feed_fetch_ops=False): + use_feed_fetch_ops=False, + use_ascend_cl=False): """ Load model configs and create the paddle predictor by Paddle Inference API. @@ -108,6 +109,7 @@ class LocalPredictor(object): mkldnn_op_list: op list accelerated using MKLDNN, None default. mkldnn_bf16_op_list: op list accelerated using MKLDNN bf16, None default. use_feed_fetch_ops: use feed/fetch ops, False default. + use_ascend_cl: run predict on Huawei Ascend, False default """ gpu_id = int(gpu_id) client_config = "{}/serving_server_conf.prototxt".format(model_path) @@ -146,11 +148,12 @@ class LocalPredictor(object): "gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{}, " "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " - "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, ".format( + "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, " + "use_ascend_cl:{} ".format( model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision, use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, - mkldnn_bf16_op_list, use_feed_fetch_ops)) + mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl)) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] @@ -215,11 +218,24 @@ class LocalPredictor(object): zero_copy=True, passes_filter=[], ops_filter=[]) + config.switch_ir_optim(True) # set xpu if use_xpu: # 2MB l3 cache config.enable_xpu(8 * 1024 * 1024) config.set_xpu_device_id(gpu_id) + # set ascend cl + if use_ascend_cl: + if use_lite: + nnadapter_device_names = "huawei_ascend_npu" + nnadapter_context_properties = \ + "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS={}".format(gpu_id) + nnadapter_model_cache_dir = "" + config.nnadapter() \ + .enable() \ + .set_device_names([nnadapter_device_names]) \ + .set_context_properties(nnadapter_context_properties) \ + .set_model_cache_dir(nnadapter_model_cache_dir) # set cpu low precision if not use_gpu and not use_lite: if precision_type == paddle_infer.PrecisionType.Int8: diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 0447f5ec..c0bc5b0d 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -181,6 +181,8 @@ def serve_args(): "--use_lite", default=False, action="store_true", help="Use PaddleLite") parser.add_argument( "--use_xpu", default=False, action="store_true", help="Use XPU") + parser.add_argument( + "--use_ascend_cl", default=False, action="store_true", help="Use Ascend CL") parser.add_argument( "--product_name", type=str, @@ -272,13 +274,15 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.set_device(device) if args.use_xpu: server.set_xpu() + if args.use_ascend_cl: + server.set_ascend_cl() if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: server.set_container_id(args.container_id) - if gpu_mode == True: + if gpu_mode == True or args.use_xpu or args.use_ascend_cl: server.set_gpuid(args.gpu_ids) server.load_model_config(model) server.prepare_server( diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 10797999..5cc6dc4a 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -88,6 +88,7 @@ class Server(object): self.gpu_multi_stream = False self.use_lite = False self.use_xpu = False + self.use_ascend_cl = False self.model_config_paths = collections.OrderedDict() self.product_name = None self.container_id = None @@ -189,6 +190,9 @@ class Server(object): def set_xpu(self): self.use_xpu = True + def set_ascend_cl(self): + self.use_ascend_cl = True + def _prepare_engine(self, model_config_paths, device, use_encryption_model): self.device = device if self.model_toolkit_conf == None: @@ -202,6 +206,8 @@ class Server(object): if self.device == "gpu" or self.use_trt or self.gpu_multi_stream: self.gpuid = ["0"] self.device = "gpu" + elif self.use_xpu or self.use_ascend_cl: + self.gpuid = ["0"] else: self.gpuid = ["-1"] @@ -238,6 +244,7 @@ class Server(object): engine.gpu_multi_stream = self.gpu_multi_stream engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu + engine.use_ascend_cl = self.use_ascend_cl engine.use_gpu = False if len(self.gpuid) == 0: @@ -437,6 +444,11 @@ class Server(object): device_version = "xpu-" + platform.machine() elif device_type == "3": device_version = "rocm-" + platform.machine() + elif device_type == "4": + if self.use_lite: + device_version = "ascendcl-lite-" + platform.machine() + else: + device_version = "ascendcl-" + platform.machine() return device_version def download_bin(self): diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index d9df5e30..20ad0a11 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -86,6 +86,7 @@ class LocalServiceHandler(object): self._use_trt = False self._use_lite = False self._use_xpu = False + self._use_ascend_cl = False self._use_mkldnn = False self._mkldnn_cache_capacity = 0 self._mkldnn_op_list = None @@ -129,6 +130,12 @@ class LocalServiceHandler(object): devices = [int(x) for x in devices.split(",")] self._use_lite = True self._use_xpu = True + elif device_type == 5: + # Ascend 310 ARM CPU + self._device_name = "arm" + devices = [int(x) for x in devices.split(",")] + self._use_lite = True + self._use_ascend_cl = True else: _LOGGER.error( "LocalServiceHandler initialization fail. device_type={}" @@ -163,13 +170,14 @@ class LocalServiceHandler(object): "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, " "client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, " "mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " - "mkldnn_bf16_op_list:{}".format( + "mkldnn_bf16_op_list:{}, use_ascend_cl:{}".format( model_config, self._device_name, self._use_gpu, self._use_trt, self._use_lite, self._use_xpu, device_type, self._devices, self._mem_optim, self._ir_optim, self._use_profile, self._thread_num, self._client_type, self._fetch_names, self._precision, self._use_mkldnn, self._mkldnn_cache_capacity, - self._mkldnn_op_list, self._mkldnn_bf16_op_list)) + self._mkldnn_op_list, self._mkldnn_bf16_op_list, + self._use_ascend_cl)) def get_fetch_list(self): return self._fetch_names @@ -225,7 +233,8 @@ class LocalServiceHandler(object): use_mkldnn=self._use_mkldnn, mkldnn_cache_capacity=self._mkldnn_cache_capacity, mkldnn_op_list=self._mkldnn_op_list, - mkldnn_bf16_op_list=self._mkldnn_bf16_op_list) + mkldnn_bf16_op_list=self._mkldnn_bf16_op_list, + use_ascend_cl=self._use_ascend_cl) return self._local_predictor_client def get_client_config(self): @@ -284,6 +293,8 @@ class LocalServiceHandler(object): server.set_xpu() if self._use_lite: server.set_lite() + if self._use_ascend_cl: + server.set_ascend_cl() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) -- GitLab