提交 5d9210f7 编写于 作者: B barrierye

Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into try-to-get-numay_array-from-ModelRes
......@@ -82,7 +82,9 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `port` | int | `9292` | Exposed port of current service to users|
| `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served |
| `mem_optim` | bool | `False` | Enable memory optimization |
| `mem_optim` | bool | `False` | Enable memory / graphic memory optimization |
| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center>
......
......@@ -87,6 +87,8 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served |
| `mem_optim` | bool | `False` | Enable memory optimization |
| `ir_optim` | bool | `False` | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | bool | `False` | Run inference with MKL |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center>
......
......@@ -43,6 +43,7 @@ message EngineDesc {
optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
};
// model_toolkit conf
......
......@@ -345,7 +345,7 @@ int PredictorClient::numpy_predict(
PredictorRes &predict_res_batch,
const int &pid) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
VLOG(2) << "batch size: " << batch_size;
predict_res_batch.clear();
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
......@@ -462,7 +462,7 @@ int PredictorClient::numpy_predict(
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
for (ssize_t l = 0; k < int_array.shape(3); l++) {
tensor->add_float_data(int_array(i, j, k, l));
tensor->add_int64_data(int_array(i, j, k, l));
}
}
}
......@@ -474,7 +474,7 @@ int PredictorClient::numpy_predict(
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
tensor->add_float_data(int_array(i, j, k));
tensor->add_int64_data(int_array(i, j, k));
}
}
}
......@@ -484,7 +484,7 @@ int PredictorClient::numpy_predict(
auto int_array = int_feed[vec_idx].unchecked<2>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
tensor->add_float_data(int_array(i, j));
tensor->add_int64_data(int_array(i, j));
}
}
break;
......@@ -492,7 +492,7 @@ int PredictorClient::numpy_predict(
case 1: {
auto int_array = int_feed[vec_idx].unchecked<1>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
tensor->add_float_data(int_array(i));
tensor->add_int64_data(int_array(i));
}
break;
}
......
......@@ -35,6 +35,7 @@ class InferEngineCreationParams {
InferEngineCreationParams() {
_path = "";
_enable_memory_optimization = false;
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
}
......@@ -45,10 +46,16 @@ class InferEngineCreationParams {
_enable_memory_optimization = enable_memory_optimization;
}
void set_enable_ir_optimization(bool enable_ir_optimization) {
_enable_ir_optimization = enable_ir_optimization;
}
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
......@@ -68,6 +75,7 @@ class InferEngineCreationParams {
<< "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
}
......@@ -75,6 +83,7 @@ class InferEngineCreationParams {
private:
std::string _path;
bool _enable_memory_optimization;
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
};
......@@ -150,6 +159,11 @@ class ReloadableInferEngine : public InferEngine {
force_update_static_cache = conf.force_update_static_cache();
}
if (conf.has_enable_ir_optimization()) {
_infer_engine_params.set_enable_ir_optimization(
conf.enable_ir_optimization());
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
......
......@@ -9,14 +9,18 @@
- Golang: 1.9.2 and later
- Git:2.17.1 and later
- CMake:3.2.2 and later
- Python:2.7.2 and later
- Python:2.7.2 and later / 3.6 and later
It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you:
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python 3, just adjust the Python options of cmake.
This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake:
- Set `DPYTHON_INCLUDE_DIR` to `$PYTHONROOT/include/python3.6m/`
- Set `DPYTHON_LIBRARIES` to `$PYTHONROOT/lib64/libpython3.6.so`
- Set `DPYTHON_EXECUTABLE` to `$PYTHONROOT/bin/python3`
## Get Code
......@@ -54,6 +58,8 @@ make -j10
execute `make install` to put targets under directory `./output`
**Attention:** After the compilation is successful, you need to set the path of `SERVING_BIN`. See [Note](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Note) for details.
## Compile Client
``` shell
......
......@@ -9,14 +9,18 @@
- Golang: 1.9.2及以上
- Git:2.17.1及以上
- CMake:3.2.2及以上
- Python:2.7.2及以上
- Python:2.7.2及以上 / 3.6及以上
推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境:
- CPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
- GPU: `hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译,只需要调整cmake的Python相关选项即可。
本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译,只需要调整cmake的Python相关选项即可:
-`DPYTHON_INCLUDE_DIR`设置为`$PYTHONROOT/include/python3.6m/`
-`DPYTHON_LIBRARIES`设置为`$PYTHONROOT/lib64/libpython3.6.so`
-`DPYTHON_EXECUTABLE`设置为`$PYTHONROOT/bin/python3`
## 获取代码
......@@ -54,6 +58,8 @@ make -j10
执行`make install`可以把目标产出放在`./output`目录下。
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
## 编译Client部分
``` shell
......
# Performance optimization
Due to different model structures, different prediction services consume different computing resources when performing predictions. For online prediction services, models that require less computing resources will have a higher proportion of communication time cost, which is called communication-intensive service. Models that require more computing resources have a higher time cost for inference calculations, which is called computationa-intensive services.
For a prediction service, the easiest way to determine what type it is is to look at the time ratio. Paddle Serving provides [Timeline tool] (../python/examples/util/README_CN.md), which can intuitively display the time spent in each stage of the prediction service.
For communication-intensive prediction services, requests can be aggregated, and within a limit that can tolerate delay, multiple prediction requests can be combined into a batch for prediction.
For computation-intensive prediction services, you can use GPU prediction services instead of CPU prediction services, or increase the number of graphics cards for GPU prediction services.
Under the same conditions, the communication time of the HTTP prediction service provided by Paddle Serving is longer than that of the RPC prediction service, so for communication-intensive services, please give priority to using RPC communication.
Parameters for performance optimization:
| Parameters | Type | Default | Description |
| ---------- | ---- | ------- | ------------------------------------------------------------ |
| mem_optim | bool | False | Enable memory / graphic memory optimization |
| ir_optim | bool | Fasle | Enable analysis and optimization of calculation graph,including OP fusion, etc |
# 性能优化
由于模型结构的不同,在执行预测时不同的预测对计算资源的消耗也不相同,对于在线的预测服务来说,对计算资源要求较少的模型,通信的时间成本占比就会较高,称为通信密集型服务,对计算资源要求较多的模型,推理计算的时间成本较高,称为计算密集型服务。对于这两种服务类型,可以根据实际需求采取不同的方式进行优化
由于模型结构的不同,在执行预测时不同的预测服务对计算资源的消耗也不相同。对于在线的预测服务来说,对计算资源要求较少的模型,通信的时间成本占比就会较高,称为通信密集型服务,对计算资源要求较多的模型,推理计算的时间成本较高,称为计算密集型服务。对于这两种服务类型,可以根据实际需求采取不同的方式进行优化
对于一个预测服务来说,想要判断属于哪种类型,最简单的方法就是看时间占比,Paddle Serving提供了[Timeline工具](../python/examples/util/README_CN.md),可以直观的展现预测服务中各阶段的耗时。
......@@ -10,4 +10,9 @@
在相同条件下,Paddle Serving提供的HTTP预测服务的通信时间是大于RPC预测服务的,因此对于通信密集型的服务请优先考虑使用RPC的通信方式。
对于模型较大,预测服务内存或显存占用较多的情况,可以通过将--mem_optim选项设置为True来开启内存/显存优化。
性能优化相关参数:
| 参数 | 类型 | 默认值 | 含义 |
| --------- | ---- | ------ | -------------------------------- |
| mem_optim | bool | False | 开启内存/显存优化 |
| ir_optim | bool | Fasle | 开启计算图分析优化,包括OP融合等 |
## How to save a servable model of Paddle Serving?
# How to save a servable model of Paddle Serving?
([简体中文](./SAVE_CN.md)|English)
- Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
## Save from training or prediction script
Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
``` python
import paddle_serving_client.io as serving_io
serving_io.save_model("imdb_model", "imdb_client_conf",
......@@ -29,3 +30,15 @@ for line in sys.stdin:
fetch_map = client.predict(feed=feed, fetch=fetch)
print("{} {}".format(fetch_map["prediction"][1], label[0]))
```
## Export from saved model files
If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
```
import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
```
dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
## 怎样保存用于Paddle Serving的模型?
# 怎样保存用于Paddle Serving的模型?
(简体中文|[English](./SAVE.md))
- 目前,Paddle Serving提供了一个save_model接口供用户访问,该接口与Paddle的`save_inference_model`类似。
## 从训练或预测脚本中保存
目前,Paddle Serving提供了一个save_model接口供用户访问,该接口与Paddle的`save_inference_model`类似。
``` python
import paddle_serving_client.io as serving_io
......@@ -29,3 +30,15 @@ for line in sys.stdin:
fetch_map = client.predict(feed=feed, fetch=fetch)
print("{} {}".format(fetch_map["prediction"][1], label[0]))
```
## 从已保存的模型文件中导出
如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型,则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
```
import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
```
dirname (str) – 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。
model_filename (str,可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 __model__ 作为默认的文件名。默认值为None。
params_filename (str,可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None。默认值为None。
serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
......@@ -194,6 +194,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -198,6 +198,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -19,6 +19,8 @@ endif()
if (CLIENT)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
endif()
if (APP)
......@@ -43,7 +45,8 @@ if (APP)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
......@@ -52,6 +55,7 @@ add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
......
......@@ -2,16 +2,6 @@
([简体中文](./README_CN.md)|English)
### Compile Source Code
in the root directory of this git project
```
mkdir build_server
cd build_server
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
make -j10
make install -j10
```
### Get Sample Dataset
go to directory `python/examples/criteo_ctr_with_cube`
......
## 带稀疏参数索引服务的CTR预测服务
(简体中文|[English](./README.md))
### 编译源代码
在本项目的根目录下,执行
```
mkdir build_server
cd build_server
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
make -j10
make install -j10
```
### 获取样例数据
进入目录 `python/examples/criteo_ctr_with_cube`
```
......
......@@ -71,6 +71,7 @@ class Debugger(object):
if profile:
config.enable_profile()
config.set_cpu_math_library_num_threads(cpu_num)
config.switch_ir_optim(False)
self.predictor = create_paddle_predictor(config)
......
......@@ -260,10 +260,16 @@ class Client(object):
if i == 0:
int_feed_names.append(key)
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
int_shape.append(list(feed_i[key].shape))
else:
int_shape.append(self.feed_shapes_[key])
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
#int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
int_slot.append(feed_i[key])
self.has_numpy_input = True
......@@ -274,10 +280,16 @@ class Client(object):
if i == 0:
float_feed_names.append(key)
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
float_shape.append(list(feed_i[key].shape))
else:
float_shape.append(self.feed_shapes_[key])
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
#float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
float_slot.append(feed_i[key])
self.has_numpy_input = True
......
......@@ -103,17 +103,21 @@ def save_model(server_model_folder,
fout.write(config.SerializeToString())
def inference_model_to_serving(infer_model, serving_client, serving_server):
def inference_model_to_serving(dirname,
model_filename=None,
params_filename=None,
serving_server="serving_server",
serving_client="serving_client"):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_program, feed_target_names, fetch_targets = \
fluid.io.load_inference_model(dirname=infer_model, executor=exe)
fluid.io.load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)
feed_dict = {
x: inference_program.global_block().var(x)
for x in feed_target_names
}
fetch_dict = {x.name: x for x in fetch_targets}
save_model(serving_client, serving_server, feed_dict, fetch_dict,
save_model(serving_server, serving_client, feed_dict, fetch_dict,
inference_program)
feed_names = feed_dict.keys()
fetch_names = fetch_dict.keys()
......
......@@ -127,6 +127,7 @@ class Server(object):
self.model_toolkit_conf = None
self.resource_conf = None
self.memory_optimization = False
self.ir_optimization = False
self.model_conf = None
self.workflow_fn = "workflow.prototxt"
self.resource_fn = "resource.prototxt"
......@@ -175,6 +176,9 @@ class Server(object):
def set_memory_optimize(self, flag=False):
self.memory_optimization = flag
def set_ir_optimize(self, flag=False):
self.ir_optimization = flag
def check_local_bin(self):
if "SERVING_BIN" in os.environ:
self.use_local_bin = True
......@@ -195,6 +199,7 @@ class Server(object):
engine.enable_batch_align = 0
engine.model_data_path = model_config_path
engine.enable_memory_optimization = self.memory_optimization
engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
......@@ -244,7 +249,7 @@ class Server(object):
workflow_oi_config_path = None
if isinstance(model_config_paths, str):
# If there is only one model path, use the default infer_op.
# Because there are several infer_op type, we need to find
# Because there are several infer_op type, we need to find
# it from workflow_conf.
default_engine_names = [
'general_infer_0', 'general_dist_kv_infer_0',
......@@ -284,8 +289,8 @@ class Server(object):
# check config here
# print config here
def use_mkl(self):
self.mkl_flag = True
def use_mkl(self, flag):
self.mkl_flag = flag
def get_device_version(self):
avx_flag = False
......@@ -300,6 +305,10 @@ class Server(object):
else:
device_version = "serving-cpu-avx-openblas-"
else:
if mkl_flag:
print(
"Your CPU does not support AVX, server will running with noavx-openblas mode."
)
device_version = "serving-cpu-noavx-openblas-"
return device_version
......
......@@ -41,6 +41,9 @@ def parse_args(): # pylint: disable=doc-string-missing
"--device", type=str, default="cpu", help="Type of device")
parser.add_argument(
"--mem_optim", type=bool, default=False, help="Memory optimize")
parser.add_argument(
"--ir_optim", type=bool, default=False, help="Graph optimize")
parser.add_argument("--use_mkl", type=bool, default=False, help="Use MKL")
parser.add_argument(
"--max_body_size",
type=int,
......@@ -57,7 +60,9 @@ def start_standard_model(): # pylint: disable=doc-string-missing
workdir = args.workdir
device = args.device
mem_optim = args.mem_optim
ir_optim = args.ir_optim
max_body_size = args.max_body_size
use_mkl = args.use_mkl
if model == "":
print("You must specify your serving model")
......@@ -78,6 +83,8 @@ def start_standard_model(): # pylint: disable=doc-string-missing
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.use_mkl(use_mkl)
server.set_max_body_size(max_body_size)
server.set_port(port)
......
......@@ -47,6 +47,8 @@ def serve_args():
"--name", type=str, default="None", help="Default service name")
parser.add_argument(
"--mem_optim", type=bool, default=False, help="Memory optimize")
parser.add_argument(
"--ir_optim", type=bool, default=False, help="Graph optimize")
parser.add_argument(
"--max_body_size",
type=int,
......@@ -156,6 +158,7 @@ class Server(object):
self.model_toolkit_conf = None
self.resource_conf = None
self.memory_optimization = False
self.ir_optimization = False
self.model_conf = None
self.workflow_fn = "workflow.prototxt"
self.resource_fn = "resource.prototxt"
......@@ -204,6 +207,9 @@ class Server(object):
def set_memory_optimize(self, flag=False):
self.memory_optimization = flag
def set_ir_optimize(self, flag=False):
self.ir_optimization = flag
def check_local_bin(self):
if "SERVING_BIN" in os.environ:
self.use_local_bin = True
......@@ -240,6 +246,7 @@ class Server(object):
engine.enable_batch_align = 0
engine.model_data_path = model_config_path
engine.enable_memory_optimization = self.memory_optimization
engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
......
......@@ -35,6 +35,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
thread_num = args.thread
model = args.model
mem_optim = args.mem_optim
ir_optim = args.ir_optim
max_body_size = args.max_body_size
workdir = "{}_{}".format(args.workdir, gpuid)
......@@ -57,6 +58,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
server.load_model_config(model)
......
......@@ -21,7 +21,7 @@ RUN yum -y install wget && \
wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
tar -zxf Python-2.7.5.tgz && \
cd Python-2.7.5 && \
./configure --prefix=/usr/local/python2.7 --enable-shared && \
./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
make all && make install && \
make clean && \
echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
......
......@@ -21,7 +21,7 @@ RUN yum -y install wget && \
wget https://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz && \
tar -zxf Python-2.7.5.tgz && \
cd Python-2.7.5 && \
./configure --prefix=/usr/local/python2.7 --enable-shared && \
./configure --prefix=/usr/local/python2.7 --enable-shared --enable-unicode=ucs4 && \
make all && make install && \
make clean && \
echo 'export PATH=/usr/local/python2.7/bin:$PATH' >> /root/.bashrc && \
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
import re
with open("setup.cfg", "w") as f:
line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=linux_x86_64".format(
get_abbr_impl(), get_impl_ver())
f.write(line)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册