提交 2a8dac26 编写于 作者: M MRXLT

bug fix

上级 6e5a02fa
......@@ -128,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center>
......
......@@ -124,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center>
......
......@@ -13,7 +13,9 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
......@@ -563,10 +563,12 @@ class CloneDBReloadableInferEngine
};
template <typename FluidFamilyCore>
// class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore>
// {
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
public:
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
......@@ -622,7 +624,7 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1;
}
VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp;
#else
if (engine->proc_initialize(conf, version) != 0) {
......
......@@ -18,6 +18,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-an
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
```
### Python 2
```
......@@ -25,6 +27,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
```
## Client
......
......@@ -198,7 +198,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
#if 0
#if 0 // todo: support flexible shape
int min_seq_len = 1;
int max_seq_len = 512;
......@@ -238,7 +238,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int max_batch = 256;
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
analysis_config.EnableTensorRtEngine(
......@@ -246,8 +246,8 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
max_batch,
min_subgraph_size,
paddle::AnalysisConfig::Precision::kFloat32,
true,
true);
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
if (params.enable_memory_optimization()) {
......
......@@ -403,7 +403,10 @@ class Server(object):
for line in version_file.readlines():
if re.match("cuda_version", line):
cuda_version = line.split("\"")[1]
device_version = "serving-gpu-cuda" + cuda_version + "-"
if cuda_version != "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-"
folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册