diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 0e202d3b06537646e489510c781cf125e87e3e07..82d35932a0240a3bd230c0c2d5072899ed9fa230 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -136,8 +136,8 @@ if (WITH_TRT) endif() if (WITH_LITE) - ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL) - SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a) + ADD_LIBRARY(paddle_full_api_shared STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET paddle_full_api_shared PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_full_api_shared.so) if (WITH_XPU) ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL) @@ -157,7 +157,7 @@ LIST(APPEND paddle_depend_libs xxhash) if(WITH_LITE) - LIST(APPEND paddle_depend_libs paddle_api_full_bundled) + LIST(APPEND paddle_depend_libs paddle_full_api_shared) if(WITH_XPU) LIST(APPEND paddle_depend_libs xpuapi xpurt) endif() diff --git a/doc/FAQ.md b/doc/FAQ.md index 0dc4ed35a55e5904adbd1b924441aa21bc5436ab..233ee8381d5d2e6a0ea2b1a3084e310de84a272f 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -34,6 +34,42 @@ **A:** http rpc +## 安装问题 + +#### Q: pip install安装whl包过程,报错信息如下: +``` +Collecting opencv-python + Using cached opencv-python-4.3.0.38.tar.gz (88.0 MB) + Installing build dependencies ... done + Getting requirements to build wheel ... error + ERROR: Command errored out with exit status 1: + command: /home/work/Python-2.7.17/build/bin/python /home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpLiweA9 + cwd: /tmp/pip-install-_w6AUI/opencv-python + Complete output (22 lines): + Traceback (most recent call last): + File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 280, in + main() + File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 263, in main + json_out['return_val'] = hook(**hook_input['kwargs']) + File "/home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py", line 114, in get_requires_for_build_wheel + return hook(config_settings) + File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 146, in get_requires_for_build_wheel + return self._get_build_requires(config_settings, requirements=['wheel']) + File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 127, in _get_build_requires + self.run_setup() + File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 243, in run_setup + self).run_setup(setup_script=setup_script) + File "/tmp/pip-build-env-AUCbP4/overlay/lib/python2.7/site-packages/setuptools/build_meta.py", line 142, in run_setup + exec(compile(code, __file__, 'exec'), locals()) + File "setup.py", line 448, in + main() + File "setup.py", line 99, in main + % {"ext": re.escape(sysconfig.get_config_var("EXT_SUFFIX"))} + File "/home/work/Python-2.7.17/build/lib/python2.7/re.py", line 210, in escape + s = list(pattern) + TypeError: 'NoneType' object is not iterable +``` +**A:** 指定opencv-python版本安装,pip install opencv-python==4.2.0.32,再安装whl包 ## 编译问题 diff --git a/java/examples/src/main/java/PipelineClientExample.java b/java/examples/src/main/java/PipelineClientExample.java index 1f459d82a99ad707c5803ab00d662eeceea56219..378d2c0c0209b1a51b1e5e75b4662633122f8398 100644 --- a/java/examples/src/main/java/PipelineClientExample.java +++ b/java/examples/src/main/java/PipelineClientExample.java @@ -62,7 +62,7 @@ public class PipelineClientExample { return false; } } - PipelineFuture future = StaticPipelineClient.client.asyn_pr::qedict(feed_data, fetch,false,0); + PipelineFuture future = StaticPipelineClient.client.asyn_predict(feed_data, fetch,false,0); HashMap result = future.get(); if (result == null) { return false; diff --git a/java/examples/src/main/java/StaticPipelineClient.java b/java/examples/src/main/java/StaticPipelineClient.java index 7399b05969c712602bc097d36ec5db2380c89328..6a54ce2e5cc5e302c5debe07d119b21c0873f7a6 100644 --- a/java/examples/src/main/java/StaticPipelineClient.java +++ b/java/examples/src/main/java/StaticPipelineClient.java @@ -37,7 +37,7 @@ public class StaticPipelineClient { System.out.println("already connect."); return true; } - succ = clieint.connect(target); + succ = client.connect(target); if (succ != true) { System.out.println("connect failed."); return false; diff --git a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h index 92408cdacc581f7f9323840b87518df8ab8136ed..b3db6e1ad03d1822155918f9eb8714b6285972d1 100644 --- a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h +++ b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h @@ -128,20 +128,22 @@ class FluidArmAnalysisCore : public FluidFamilyCore { config.DisableGpu(); config.SetCpuMathLibraryNumThreads(1); - if (params.enable_memory_optimization()) { - config.EnableMemoryOptim(); + if (params.use_lite()) { + config.EnableLiteEngine(PrecisionType::kFloat32, true); } - if (params.enable_memory_optimization()) { - config.EnableMemoryOptim(); + if (params.use_xpu()) { + config.EnableXpu(2 * 1024 * 1024); } - if (params.use_lite()) { - config.EnableLiteEngine(PrecisionType::kFloat32, true); + if (params.enable_memory_optimization()) { + config.EnableMemoryOptim(); } - if (params.use_xpu()) { - config.EnableXpu(100); + if (params.enable_ir_optimization()) { + config.SwitchIrOptim(true); + } else { + config.SwitchIrOptim(false); } config.SwitchSpecifyInputNames(true); @@ -173,6 +175,14 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore { config.SwitchSpecifyInputNames(true); config.SetCpuMathLibraryNumThreads(1); + if (params.use_lite()) { + config.EnableLiteEngine(PrecisionType::kFloat32, true); + } + + if (params.use_xpu()) { + config.EnableXpu(2 * 1024 * 1024); + } + if (params.enable_memory_optimization()) { config.EnableMemoryOptim(); } @@ -183,14 +193,6 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore { config.SwitchIrOptim(false); } - if (params.use_lite()) { - config.EnableLiteEngine(PrecisionType::kFloat32, true); - } - - if (params.use_xpu()) { - config.EnableXpu(100); - } - AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = CreatePredictor(config); if (NULL == _core.get()) { diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 2f3865d67d22403c38d9db21fbfb39e98de2659f..d17844991ea342e142476acececb14ac2e6ae106 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -99,15 +99,27 @@ if (SERVER) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) elseif(WITH_LITE) - add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r - ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py - "server_gpu" arm - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) - add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + if(WITH_XPU) + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r + ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py + "server_gpu" arm-xpu + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + else() + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r + ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py + "server_gpu" arm + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + endif() else() add_custom_command( OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index 5a641fe6358a62b67c435e9881d481c2c5616b1f..1c49f01f22cbc23cfecb70fb36d3a72ff0991e5f 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -132,6 +132,7 @@ class LocalPredictor(object): ops_filter=[]) if use_xpu: + # 2MB l3 cache config.enable_xpu(8 * 1024 * 1024) self.predictor = create_paddle_predictor(config) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index 1f035db9262ffbd8e031c9b0018877eb2ba6fad2..fbe48180867faf9f2baba71fc3c5c8cf6ab771e2 100644 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -20,7 +20,7 @@ from paddle_serving_server import OpMaker, OpSeqMaker, Server from paddle_serving_client import Client from contextlib import closing import socket - +import numpy as np from paddle_serving_server import pipeline from paddle_serving_server.pipeline import Op @@ -64,8 +64,8 @@ class WebService(object): f = open(client_config, 'r') model_conf = google.protobuf.text_format.Merge( str(f.read()), model_conf) - self.feed_names = [var.alias_name for var in model_conf.feed_var] - self.fetch_names = [var.alias_name for var in model_conf.fetch_var] + self.feed_vars = {var.name: var for var in model_conf.feed_var} + self.fetch_vars = {var.name: var for var in model_conf.fetch_var} def _launch_rpc_service(self): op_maker = OpMaker() @@ -201,6 +201,15 @@ class WebService(object): def preprocess(self, feed=[], fetch=[]): print("This API will be deprecated later. Please do not use it") is_batch = True + feed_dict = {} + for var_name in self.feed_vars.keys(): + feed_dict[var_name] = [] + for feed_ins in feed: + for key in feed_ins: + feed_dict[key].append(np.array(feed_ins[key]).reshape(list(self.feed_vars[key].shape))[np.newaxis,:]) + feed = {} + for key in feed_dict: + feed[key] = np.concatenate(feed_dict[key], axis=0) return feed, fetch, is_batch def postprocess(self, feed=[], fetch=[], fetch_map=None): diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index b8fe91bb594b1f91141658afcb876f2291d4d35e..44402e734f3b9dd22db4ae674cf85e5cff614f8f 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -212,6 +212,7 @@ class Server(object): self.module_path = os.path.dirname(paddle_serving_server.__file__) self.cur_path = os.getcwd() self.use_local_bin = False + self.device = "cpu" self.gpuid = 0 self.use_trt = False self.use_lite = False @@ -279,6 +280,9 @@ class Server(object): "GPU not found, please check your environment or use cpu version by \"pip install paddle_serving_server\"" ) + def set_device(self, device="cpu"): + self.device = device + def set_gpuid(self, gpuid=0): self.gpuid = gpuid @@ -311,18 +315,19 @@ class Server(object): engine.static_optimization = False engine.force_update_static_cache = False engine.use_trt = self.use_trt - engine.use_lite = self.use_lite - engine.use_xpu = self.use_xpu - - - + if os.path.exists('{}/__params__'.format(model_config_path)): + suffix = "" + else: + suffix = "_DIR" + if device == "arm": + engine.use_lite = self.use_lite + engine.use_xpu = self.use_xpu if device == "cpu": - engine.type = "FLUID_CPU_ANALYSIS_DIR" + engine.type = "FLUID_CPU_ANALYSIS" + suffix elif device == "gpu": - engine.type = "FLUID_GPU_ANALYSIS_DIR" + engine.type = "FLUID_GPU_ANALYSIS" + suffix elif device == "arm": - engine.type = "FLUID_ARM_ANALYSIS_DIR" - + engine.type = "FLUID_ARM_ANALYSIS" + suffix self.model_toolkit_conf.engines.extend([engine]) def _prepare_infer_service(self, port): @@ -425,7 +430,7 @@ class Server(object): cuda_version = line.split("\"")[1] if cuda_version == "101" or cuda_version == "102" or cuda_version == "110": device_version = "serving-gpu-" + cuda_version + "-" - elif cuda_version == "arm": + elif cuda_version == "arm" or cuda_version == "arm-xpu": device_version = "serving-" + cuda_version + "-" else: device_version = "serving-gpu-cuda" + cuda_version + "-" @@ -528,7 +533,8 @@ class Server(object): else: print("Use local bin : {}".format(self.bin_path)) #self.check_cuda() - if self.use_lite: + # Todo: merge CPU and GPU code, remove device to model_toolkit + if self.device == "cpu" or self.device == "arm": command = "{} " \ "-enable_model_toolkit " \ "-inferservice_path {} " \ diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index ffa4c2336fd4307f67fd2f3578a1aa3102850ce9..057a25e483cd7c160bc7bbef8b9378f9bf08f32c 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -73,6 +73,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss server.set_lite() device = "arm" + server.set_device(device) if args.use_xpu: server.set_xpu() diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index 4b89d90ee6893c3fafd596dc8f6c5cabc3a248bf..e2c24f4068da1a6ccccaa789186cab4e2a8fa6d9 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -70,8 +70,8 @@ class WebService(object): f = open(client_config, 'r') model_conf = google.protobuf.text_format.Merge( str(f.read()), model_conf) - self.feed_names = [var.alias_name for var in model_conf.feed_var] - self.fetch_names = [var.alias_name for var in model_conf.fetch_var] + self.feed_vars = {var.name: var for var in model_conf.feed_var} + self.fetch_vars = {var.name: var for var in model_conf.fetch_var} def set_gpus(self, gpus): print("This API will be deprecated later. Please do not use it") @@ -107,6 +107,7 @@ class WebService(object): server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) server.set_ir_optimize(ir_optim) + server.set_device(device) if use_lite: server.set_lite() @@ -278,6 +279,15 @@ class WebService(object): def preprocess(self, feed=[], fetch=[]): print("This API will be deprecated later. Please do not use it") is_batch = True + feed_dict = {} + for var_name in self.feed_vars.keys(): + feed_dict[var_name] = [] + for feed_ins in feed: + for key in feed_ins: + feed_dict[key].append(np.array(feed_ins[key]).reshape(list(self.feed_vars[key].shape))[np.newaxis,:]) + feed = {} + for key in feed_dict: + feed[key] = np.concatenate(feed_dict[key], axis=0) return feed, fetch, is_batch def postprocess(self, feed=[], fetch=[], fetch_map=None): diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index eaa04ee01411260f82992d4327c9d8ac033b91f0..65261dfa38f20a2174dc90fea70b5296187f0044 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -249,6 +249,8 @@ class LocalServiceHandler(object): server = Server() if gpuid >= 0: server.set_gpuid(gpuid) + # TODO: support arm or arm + xpu later + server.set_device(self._device_name) server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num)