未验证 提交 22e202ed 编写于 作者: J Jiawei Wang 提交者: GitHub

Merge pull request #976 from zhangjun/bugfix

[Bugfix]Serving使用xpu异常修复,兼容性完善
......@@ -136,8 +136,8 @@ if (WITH_TRT)
endif()
if (WITH_LITE)
ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a)
ADD_LIBRARY(paddle_full_api_shared STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_full_api_shared PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_full_api_shared.so)
if (WITH_XPU)
ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL)
......@@ -157,7 +157,7 @@ LIST(APPEND paddle_depend_libs
xxhash)
if(WITH_LITE)
LIST(APPEND paddle_depend_libs paddle_api_full_bundled)
LIST(APPEND paddle_depend_libs paddle_full_api_shared)
if(WITH_XPU)
LIST(APPEND paddle_depend_libs xpuapi xpurt)
endif()
......
......@@ -128,20 +128,22 @@ class FluidArmAnalysisCore : public FluidFamilyCore {
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.use_xpu()) {
config.EnableXpu(100);
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
config.SwitchSpecifyInputNames(true);
......@@ -173,6 +175,14 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
......@@ -183,14 +193,6 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
config.SwitchIrOptim(false);
}
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(100);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
......
......@@ -99,15 +99,27 @@ if (SERVER)
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_LITE)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
if(WITH_XPU)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm-xpu
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
......@@ -132,6 +132,7 @@ class LocalPredictor(object):
ops_filter=[])
if use_xpu:
# 2MB l3 cache
config.enable_xpu(8 * 1024 * 1024)
self.predictor = create_paddle_predictor(config)
......
......@@ -212,6 +212,7 @@ class Server(object):
self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd()
self.use_local_bin = False
self.device = "cpu"
self.gpuid = 0
self.use_trt = False
self.use_lite = False
......@@ -279,6 +280,9 @@ class Server(object):
"GPU not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""
)
def set_device(self, device="cpu"):
self.device = device
def set_gpuid(self, gpuid=0):
self.gpuid = gpuid
......@@ -311,8 +315,9 @@ class Server(object):
engine.static_optimization = False
engine.force_update_static_cache = False
engine.use_trt = self.use_trt
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
if device == "arm":
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
......@@ -425,7 +430,7 @@ class Server(object):
cuda_version = line.split("\"")[1]
if cuda_version == "101" or cuda_version == "102" or cuda_version == "110":
device_version = "serving-gpu-" + cuda_version + "-"
elif cuda_version == "arm":
elif cuda_version == "arm" or cuda_version == "arm-xpu":
device_version = "serving-" + cuda_version + "-"
else:
device_version = "serving-gpu-cuda" + cuda_version + "-"
......@@ -528,7 +533,8 @@ class Server(object):
else:
print("Use local bin : {}".format(self.bin_path))
#self.check_cuda()
if self.use_lite:
# Todo: merge CPU and GPU code, remove device to model_toolkit
if self.device == "cpu" or self.device == "arm":
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
......
......@@ -73,6 +73,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_lite()
device = "arm"
server.set_device(device)
if args.use_xpu:
server.set_xpu()
......
......@@ -107,6 +107,7 @@ class WebService(object):
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_device(device)
if use_lite:
server.set_lite()
......
......@@ -249,6 +249,8 @@ class LocalServiceHandler(object):
server = Server()
if gpuid >= 0:
server.set_gpuid(gpuid)
# TODO: support arm or arm + xpu later
server.set_device(self._device_name)
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册