提交 2a8dac26 编写于 作者: M MRXLT

bug fix

上级 6e5a02fa
...@@ -128,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -128,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory / graphic memory optimization | | `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/). Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center> </center>
......
...@@ -124,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -124,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory optimization | | `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center> </center>
......
...@@ -13,7 +13,9 @@ set_source_files_properties( ...@@ -13,7 +13,9 @@ set_source_files_properties(
PROPERTIES PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure) add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
...@@ -563,10 +563,12 @@ class CloneDBReloadableInferEngine ...@@ -563,10 +563,12 @@ class CloneDBReloadableInferEngine
}; };
template <typename FluidFamilyCore> template <typename FluidFamilyCore>
// class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> #ifdef WITH_TRT
// {
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
public: #else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
#endif
public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
...@@ -622,7 +624,7 @@ class VersionedInferEngine : public InferEngine { ...@@ -622,7 +624,7 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type; LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1; return -1;
} }
VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr; VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp; FLAGS_logtostderr = tmp;
#else #else
if (engine->proc_initialize(conf, version) != 0) { if (engine->proc_initialize(conf, version) != 0) {
......
...@@ -18,6 +18,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-an ...@@ -18,6 +18,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-an
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
...@@ -25,6 +27,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10- ...@@ -25,6 +27,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
``` ```
## Client ## Client
......
...@@ -198,7 +198,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -198,7 +198,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim(); analysis_config.EnableMemoryOptim();
} }
#if 0 #if 0 // todo: support flexible shape
int min_seq_len = 1; int min_seq_len = 1;
int max_seq_len = 512; int max_seq_len = 512;
...@@ -238,7 +238,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -238,7 +238,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.SetTRTDynamicShapeInfo( analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape); min_input_shape, max_input_shape, opt_input_shape);
#endif #endif
int max_batch = 256; int max_batch = 32;
int min_subgraph_size = 3; int min_subgraph_size = 3;
if (params.use_trt()) { if (params.use_trt()) {
analysis_config.EnableTensorRtEngine( analysis_config.EnableTensorRtEngine(
...@@ -246,8 +246,8 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -246,8 +246,8 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
max_batch, max_batch,
min_subgraph_size, min_subgraph_size,
paddle::AnalysisConfig::Precision::kFloat32, paddle::AnalysisConfig::Precision::kFloat32,
true, false,
true); false);
LOG(INFO) << "create TensorRT predictor"; LOG(INFO) << "create TensorRT predictor";
} else { } else {
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
......
...@@ -403,7 +403,10 @@ class Server(object): ...@@ -403,7 +403,10 @@ class Server(object):
for line in version_file.readlines(): for line in version_file.readlines():
if re.match("cuda_version", line): if re.match("cuda_version", line):
cuda_version = line.split("\"")[1] cuda_version = line.split("\"")[1]
device_version = "serving-gpu-cuda" + cuda_version + "-" if cuda_version != "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-"
folder_name = device_version + serving_server_version folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz" tar_name = folder_name + ".tar.gz"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册