@@ -31,7 +32,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
安装
-**强烈建议**您在**Docker内构建**Paddle Serving,请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)
+**强烈建议**您在**Docker内构建**Paddle Serving,请查看[如何在Docker中运行PaddleServing](doc/RUN_IN_DOCKER_CN.md)。更多镜像请查看[Docker镜像列表](doc/DOCKER_IMAGES_CN.md)。
```
# 启动 CPU Docker
@@ -41,21 +42,26 @@ docker exec -it test bash
```
```
# 启动 GPU Docker
-nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-gpu
-nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-gpu
+nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:latest-cuda9.0-cudnn7
+nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:latest-cuda9.0-cudnn7
nvidia-docker exec -it test bash
```
```shell
-pip install paddle-serving-client
-pip install paddle-serving-server # CPU
-pip install paddle-serving-server-gpu # GPU
+pip install paddle-serving-client==0.3.2
+pip install paddle-serving-server==0.3.2 # CPU
+pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
+pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
```
您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
如果需要使用develop分支编译的安装包,请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载,使用`pip install`命令进行安装。
-Paddle Serving安装包支持Centos 6/7和Ubuntu 16/18,或者您可以使用HTTP服务,这种情况下不需要安装客户端。
+paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7和Ubuntu 16/18。
+
+paddle-serving-client和paddle-serving-app安装包支持Linux和Windows,其中paddle-serving-client仅支持python2.7/3.5/3.6。
+
+推荐安装1.8.2及以上版本的paddle
Paddle Serving预装的服务
@@ -76,7 +82,7 @@ Paddle Serving安装包支持Centos 6/7和Ubuntu 16/18,或者您可以使用HT
-
+
``` shell
> python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
> tar -xzf resnet_v2_50_imagenet.tar.gz
@@ -115,9 +121,10 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `port` | int | `9292` | Exposed port of current service to users|
| `name` | str | `""` | Service name, can be used to generate HTTP request url |
| `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | - | - | Enable memory optimization |
+| `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
+| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
@@ -164,6 +171,11 @@ print(fetch_map)
- [端到端完成从训练到部署全流程](doc/TRAIN_TO_SERVICE_CN.md)
- [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md)
+### AIStudio教程
+- [PaddleServing作业](https://aistudio.baidu.com/aistudio/projectdetail/605819)
+- [PaddleServing图像分割](https://aistudio.baidu.com/aistudio/projectdetail/457715)
+- [PaddleServing情感分析](https://aistudio.baidu.com/aistudio/projectdetail/509014)
+
### 开发者教程
- [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md)
- [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md)
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index f5ef70379a5562617e77a9e2ff46587cd48a0f6c..39412f6950b7d4fe71f294079b69707b202f0876 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -40,8 +40,8 @@ ExternalProject_Add(
extern_brpc
${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(gongwb): change to de newst repo when they changed.
- GIT_REPOSITORY "https://github.com/gongweibao/brpc"
- GIT_TAG "e9b67ec1b7458f2af5fae76451afe1e27e01b4b4"
+ GIT_REPOSITORY "https://github.com/wangjiawei04/brpc"
+ GIT_TAG "6d79e0b17f25107c35b705ea58d888083f59ff47"
PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index a19400bfda735e4205551c2caaba0e78fafc6ff1..c72a5cac52ccf1c03a0c132083e3ac43c83fb868 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -143,7 +143,6 @@ function(grpc_protobuf_generate_python SRCS)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
endfunction()
-
# Print and set the protobuf library information,
# finish this cmake process and exit from this file.
macro(PROMPT_PROTOBUF_LIB)
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index 7670444ed1e021376fa44491973bb748cf611ecf..4b7d3ed1f620bfcd2e1e214c49c57ee3848129e7 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,10 +31,14 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
-SET(PADDLE_VERSION "1.7.2")
+SET(PADDLE_VERSION "1.8.4")
if (WITH_GPU)
- SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
+ if (WITH_TRT)
+ SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
+ else()
+ SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
+ endif()
else()
if (WITH_AVX)
if (WITH_MKLML)
@@ -50,21 +54,38 @@ endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
if (WITH_GPU OR WITH_MKLML)
-ExternalProject_Add(
- "extern_paddle"
- ${EXTERNAL_PROJECT_LOG_ARGS}
- URL "${PADDLE_LIB_PATH}"
- PREFIX "${PADDLE_SOURCES_DIR}"
- DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- UPDATE_COMMAND ""
- INSTALL_COMMAND
- ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
- ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
- ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
- ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
-)
+ if (WITH_TRT)
+ ExternalProject_Add(
+ "extern_paddle"
+ ${EXTERNAL_PROJECT_LOG_ARGS}
+ URL "${PADDLE_LIB_PATH}"
+ PREFIX "${PADDLE_SOURCES_DIR}"
+ DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ""
+ UPDATE_COMMAND ""
+ INSTALL_COMMAND
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
+ )
+ else()
+ ExternalProject_Add(
+ "extern_paddle"
+ ${EXTERNAL_PROJECT_LOG_ARGS}
+ URL "${PADDLE_LIB_PATH}"
+ PREFIX "${PADDLE_SOURCES_DIR}"
+ DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ""
+ UPDATE_COMMAND ""
+ INSTALL_COMMAND
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
+ ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
+ ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
+ )
+ endif()
else()
ExternalProject_Add(
"extern_paddle"
@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
-ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
+ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
+
+if (WITH_TRT)
+ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
+
+ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
+endif()
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs
- xxhash)
+ xxhash)
+
+if(WITH_TRT)
+LIST(APPEND paddle_depend_libs
+ nvinfer nvinfer_plugin)
+endif()
diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt
index c3b0be5142896f87868cdd7c13686b87f03c573a..9d9487dc9e2513388b70d03e5ac1d875079d95f4 100644
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -86,6 +86,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
diff --git a/core/configure/proto/multi_lang_general_model_service.proto b/core/configure/proto/multi_lang_general_model_service.proto
index 6e1764b23b3e6f7d9eb9a33925bcd83cfb1810bb..18fbcf760647e1694e738c0832fe45f4f7d9934f 100644
--- a/core/configure/proto/multi_lang_general_model_service.proto
+++ b/core/configure/proto/multi_lang_general_model_service.proto
@@ -14,6 +14,12 @@
syntax = "proto2";
+package baidu.paddle_serving.multi_lang;
+
+option java_multiple_files = true;
+option java_package = "io.paddle.serving.grpc";
+option java_outer_classname = "ServingProto";
+
message Tensor {
optional bytes data = 1;
repeated int32 int_data = 2;
@@ -28,16 +34,18 @@ message FeedInst { repeated Tensor tensor_array = 1; };
message FetchInst { repeated Tensor tensor_array = 1; };
-message Request {
+message InferenceRequest {
repeated FeedInst insts = 1;
repeated string feed_var_names = 2;
repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ];
+ required uint64 log_id = 5 [ default = 0 ];
};
-message Response {
+message InferenceResponse {
repeated ModelOutput outputs = 1;
optional string tag = 2;
+ required int32 err_code = 3;
};
message ModelOutput {
@@ -45,6 +53,17 @@ message ModelOutput {
optional string engine_name = 2;
}
+message SetTimeoutRequest { required int32 timeout_ms = 1; }
+
+message SimpleResponse { required int32 err_code = 1; }
+
+message GetClientConfigRequest {}
+
+message GetClientConfigResponse { required string client_config_str = 1; }
+
service MultiLangGeneralModelService {
- rpc inference(Request) returns (Response) {}
+ rpc Inference(InferenceRequest) returns (InferenceResponse) {}
+ rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
+ rpc GetClientConfig(GetClientConfigRequest)
+ returns (GetClientConfigResponse) {}
};
diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
index 8956022685090c94be2037445c646e9fbffd1a5c..c008ee857bb7c69672e399ce44b2420d5db7fb3c 100644
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -44,6 +44,7 @@ message EngineDesc {
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
+ optional bool use_trt = 17;
};
// model_toolkit conf
@@ -58,6 +59,8 @@ message ResourceConf {
optional string cube_config_path = 5;
optional string cube_config_file = 6;
optional int32 cube_quant_bits = 7; // set 0 if no quant.
+ optional string auth_product_name = 8;
+ optional string auth_container_id = 9;
};
// DAG node depency info
diff --git a/core/cube/CMakeLists.txt b/core/cube/CMakeLists.txt
index 07cf04977b618a515a2459f646c2dba298a5d58b..f9dc4d2c2508720f450b4aee3aba5dfdd7ccd43b 100644
--- a/core/cube/CMakeLists.txt
+++ b/core/cube/CMakeLists.txt
@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License
+#execute_process(COMMAND go env -w GO111MODULE=off)
add_subdirectory(cube-server)
add_subdirectory(cube-api)
add_subdirectory(cube-builder)
-add_subdirectory(cube-transfer)
-add_subdirectory(cube-agent)
+#add_subdirectory(cube-transfer)
+#add_subdirectory(cube-agent)
diff --git a/core/cube/cube-api/include/meta.h b/core/cube/cube-api/include/meta.h
index 69bbb8ccc12e423d286183ed5dd87e90bf2e59de..ec872a38d8b0294f7b06e8557848f6e8ca79aa2b 100644
--- a/core/cube/cube-api/include/meta.h
+++ b/core/cube/cube-api/include/meta.h
@@ -22,7 +22,8 @@
#ifdef BCLOUD
#include "baidu/rpc/channel.h"
#include "baidu/rpc/parallel_channel.h"
-#include "rapidjson/document.h"
+#include "rapidjson_1.0/document.h"
+#include "rapidjson_1.0/rapidjson.h"
#else
#include "brpc/channel.h"
#include "brpc/parallel_channel.h"
diff --git a/core/cube/cube-api/src/cube_cli.cpp b/core/cube/cube-api/src/cube_cli.cpp
index eee4b0c31ad83ca69d242e81bae3ce4ecfb5bf1a..4a29ef46392af22deb1b1a633d799f9846e86c59 100644
--- a/core/cube/cube-api/src/cube_cli.cpp
+++ b/core/cube/cube-api/src/cube_cli.cpp
@@ -13,6 +13,7 @@
// limitations under the License.
#include
+#include
#include
#include
#include //NOLINT
@@ -31,8 +32,9 @@ DEFINE_bool(print_output, false, "print output flag");
DEFINE_int32(thread_num, 1, "thread num");
std::atomic g_concurrency(0);
-std::vector time_list;
+std::vector> time_list;
std::vector request_list;
+int turns = 1000;
namespace {
inline uint64_t time_diff(const struct timeval& start_time,
@@ -93,14 +95,15 @@ int run(int argc, char** argv, int thread_id) {
uint64_t file_size = key_list.size();
uint64_t index = 0;
uint64_t request = 0;
-
while (g_concurrency.load() >= FLAGS_thread_num) {
}
g_concurrency++;
-
- while (index < file_size) {
+ time_list[thread_id].resize(turns);
+ while (request < turns) {
// uint64_t key = strtoul(buffer, NULL, 10);
-
+ if (index >= file_size) {
+ index = 0;
+ }
keys.push_back(key_list[index]);
index += 1;
int ret = 0;
@@ -121,47 +124,12 @@ int run(int argc, char** argv, int thread_id) {
}
++seek_counter;
uint64_t seek_cost = time_diff(seek_start, seek_end);
- seek_cost_total += seek_cost;
- if (seek_cost > seek_cost_max) {
- seek_cost_max = seek_cost;
- }
- if (seek_cost < seek_cost_min) {
- seek_cost_min = seek_cost;
- }
+ time_list[thread_id][request - 1] = seek_cost;
keys.clear();
values.clear();
}
}
- /*
- if (keys.size() > 0) {
- int ret = 0;
- values.resize(keys.size());
- TIME_FLAG(seek_start);
- ret = cube->seek(FLAGS_dict, keys, &values);
- TIME_FLAG(seek_end);
- if (ret != 0) {
- LOG(WARNING) << "cube seek failed";
- } else if (FLAGS_print_output) {
- for (size_t i = 0; i < keys.size(); ++i) {
- fprintf(stdout,
- "key:%lu value:%s\n",
- keys[i],
- string_to_hex(values[i].buff).c_str());
- }
- }
-
- ++seek_counter;
- uint64_t seek_cost = time_diff(seek_start, seek_end);
- seek_cost_total += seek_cost;
- if (seek_cost > seek_cost_max) {
- seek_cost_max = seek_cost;
- }
- if (seek_cost < seek_cost_min) {
- seek_cost_min = seek_cost;
- }
- }
- */
g_concurrency--;
// fclose(key_file);
@@ -171,12 +139,6 @@ int run(int argc, char** argv, int thread_id) {
LOG(WARNING) << "destroy cube api failed err=" << ret;
}
- uint64_t seek_cost_avg = seek_cost_total / seek_counter;
- LOG(INFO) << "seek cost avg = " << seek_cost_avg;
- LOG(INFO) << "seek cost max = " << seek_cost_max;
- LOG(INFO) << "seek cost min = " << seek_cost_min;
-
- time_list[thread_id] = seek_cost_avg;
request_list[thread_id] = request;
return 0;
@@ -188,6 +150,7 @@ int run_m(int argc, char** argv) {
request_list.resize(thread_num);
time_list.resize(thread_num);
std::vector thread_pool;
+ TIME_FLAG(main_start);
for (int i = 0; i < thread_num; i++) {
thread_pool.push_back(new std::thread(run, argc, argv, i));
}
@@ -195,28 +158,43 @@ int run_m(int argc, char** argv) {
thread_pool[i]->join();
delete thread_pool[i];
}
+ TIME_FLAG(main_end);
uint64_t sum_time = 0;
uint64_t max_time = 0;
uint64_t min_time = 1000000;
- uint64_t request_num = 0;
+ std::vector all_time_list;
for (int i = 0; i < thread_num; i++) {
- sum_time += time_list[i];
- if (time_list[i] > max_time) {
- max_time = time_list[i];
- }
- if (time_list[i] < min_time) {
- min_time = time_list[i];
+ for (int j = 0; j < request_list[i]; j++) {
+ sum_time += time_list[i][j];
+ if (time_list[i][j] > max_time) {
+ max_time = time_list[i][j];
+ }
+ if (time_list[i][j] < min_time) {
+ min_time = time_list[i][j];
+ }
+ all_time_list.push_back(time_list[i][j]);
}
- request_num += request_list[i];
}
- uint64_t mean_time = sum_time / thread_num;
- LOG(INFO) << thread_num << " thread seek cost"
- << " avg = " << std::to_string(mean_time)
- << " max = " << std::to_string(max_time)
- << " min = " << std::to_string(min_time);
- LOG(INFO) << " total_request = " << std::to_string(request_num) << " speed = "
- << std::to_string(1000000 * thread_num / mean_time) // mean_time us
- << " query per second";
+ std::sort(all_time_list.begin(), all_time_list.end());
+ uint64_t mean_time = sum_time / (thread_num * turns);
+ uint64_t main_time = time_diff(main_start, main_end);
+ uint64_t request_num = turns * thread_num;
+ LOG(INFO)
+ << "\n"
+ << thread_num << " thread seek cost"
+ << "\navg: " << std::to_string(mean_time) << "\n50 percent: "
+ << std::to_string(all_time_list[static_cast(0.5 * request_num)])
+ << "\n80 percent: "
+ << std::to_string(all_time_list[static_cast(0.8 * request_num)])
+ << "\n90 percent: "
+ << std::to_string(all_time_list[static_cast(0.9 * request_num)])
+ << "\n99 percent: "
+ << std::to_string(all_time_list[static_cast(0.99 * request_num)])
+ << "\n99.9 percent: "
+ << std::to_string(all_time_list[static_cast(0.999 * request_num)])
+ << "\ntotal_request: " << std::to_string(request_num) << "\nspeed: "
+ << std::to_string(turns * 1000000 / main_time) // mean_time us
+ << " query per second";
return 0;
}
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index b379188854c30587d24962bc827aa099c3a39183..3ee960069fd1eb8575d39fe4797038f9d4ef9f3b 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -49,6 +49,8 @@ class ModelRes {
res._int64_value_map.end());
_float_value_map.insert(res._float_value_map.begin(),
res._float_value_map.end());
+ _int32_value_map.insert(res._int32_value_map.begin(),
+ res._int32_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end());
}
@@ -60,6 +62,9 @@ class ModelRes {
_float_value_map.insert(
std::make_move_iterator(std::begin(res._float_value_map)),
std::make_move_iterator(std::end(res._float_value_map)));
+ _int32_value_map.insert(
+ std::make_move_iterator(std::begin(res._int32_value_map)),
+ std::make_move_iterator(std::end(res._int32_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
@@ -78,6 +83,12 @@ class ModelRes {
std::vector&& get_float_by_name_with_rv(const std::string& name) {
return std::move(_float_value_map[name]);
}
+ const std::vector& get_int32_by_name(const std::string& name) {
+ return _int32_value_map[name];
+ }
+ std::vector&& get_int32_by_name_with_rv(const std::string& name) {
+ return std::move(_int32_value_map[name]);
+ }
const std::vector& get_shape_by_name(const std::string& name) {
return _shape_map[name];
}
@@ -103,6 +114,9 @@ class ModelRes {
_float_value_map.insert(
std::make_move_iterator(std::begin(res._float_value_map)),
std::make_move_iterator(std::end(res._float_value_map)));
+ _int32_value_map.insert(
+ std::make_move_iterator(std::begin(res._int32_value_map)),
+ std::make_move_iterator(std::end(res._int32_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
@@ -115,6 +129,7 @@ class ModelRes {
std::string _engine_name;
std::map> _int64_value_map;
std::map> _float_value_map;
+ std::map> _int32_value_map;
std::map> _shape_map;
std::map> _lod_map;
};
@@ -145,6 +160,14 @@ class PredictorRes {
const std::string& name) {
return std::move(_models[model_idx].get_float_by_name_with_rv(name));
}
+ const std::vector& get_int32_by_name(const int model_idx,
+ const std::string& name) {
+ return _models[model_idx].get_int32_by_name(name);
+ }
+ std::vector&& get_int32_by_name_with_rv(const int model_idx,
+ const std::string& name) {
+ return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
+ }
const std::vector& get_shape_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_shape_by_name(name);
@@ -195,27 +218,19 @@ class PredictorClient {
int destroy_predictor();
- int batch_predict(
- const std::vector>>& float_feed_batch,
- const std::vector& float_feed_name,
- const std::vector>& float_shape,
- const std::vector>>& int_feed_batch,
- const std::vector& int_feed_name,
- const std::vector>& int_shape,
- const std::vector& fetch_name,
- PredictorRes& predict_res_batch, // NOLINT
- const int& pid);
-
int numpy_predict(
const std::vector>>& float_feed_batch,
const std::vector& float_feed_name,
const std::vector>& float_shape,
+ const std::vector>& float_lod_slot_batch,
const std::vector>>& int_feed_batch,
const std::vector& int_feed_name,
const std::vector>& int_shape,
+ const std::vector>& int_lod_slot_batch,
const std::vector& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
- const int& pid);
+ const int& pid,
+ const uint64_t log_id);
private:
PredictorApi _api;
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index d4e54c2ac04cf84b2a036f7abe0d426e6f186699..c2db765a082bf2e18aa7fe88c614a6bc8bb457c8 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -39,7 +39,9 @@ using configure::GeneralModelConfig;
void PredictorClient::init_gflags(std::vector argv) {
std::call_once(gflags_init_flag, [&]() {
+#ifndef BCLOUD
FLAGS_logtostderr = true;
+#endif
argv.insert(argv.begin(), "dummy");
int argc = argv.size();
char **arr = new char *[argv.size()];
@@ -135,216 +137,19 @@ int PredictorClient::create_predictor() {
return 0;
}
-int PredictorClient::batch_predict(
- const std::vector>> &float_feed_batch,
- const std::vector &float_feed_name,
- const std::vector> &float_shape,
- const std::vector>> &int_feed_batch,
- const std::vector &int_feed_name,
- const std::vector> &int_shape,
- const std::vector &fetch_name,
- PredictorRes &predict_res_batch,
- const int &pid) {
- int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-
- predict_res_batch.clear();
- Timer timeline;
- int64_t preprocess_start = timeline.TimeStampUS();
-
- int fetch_name_num = fetch_name.size();
-
- _api.thrd_initialize();
- std::string variant_tag;
- _predictor = _api.fetch_predictor("general_model", &variant_tag);
- predict_res_batch.set_variant_tag(variant_tag);
- VLOG(2) << "fetch general model predictor done.";
- VLOG(2) << "float feed name size: " << float_feed_name.size();
- VLOG(2) << "int feed name size: " << int_feed_name.size();
- VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
- Request req;
- for (auto &name : fetch_name) {
- req.add_fetch_var_names(name);
- }
-
- for (int bi = 0; bi < batch_size; bi++) {
- VLOG(2) << "prepare batch " << bi;
- std::vector tensor_vec;
- FeedInst *inst = req.add_insts();
- std::vector> float_feed = float_feed_batch[bi];
- std::vector> int_feed = int_feed_batch[bi];
- for (auto &name : float_feed_name) {
- tensor_vec.push_back(inst->add_tensor_array());
- }
-
- for (auto &name : int_feed_name) {
- tensor_vec.push_back(inst->add_tensor_array());
- }
-
- VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
- << "prepared";
- int vec_idx = 0;
- VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
- << float_shape.size();
- for (auto &name : float_feed_name) {
- int idx = _feed_name_to_idx[name];
- Tensor *tensor = tensor_vec[idx];
- VLOG(2) << "prepare float feed " << name << " shape size "
- << float_shape[vec_idx].size();
- for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
- tensor->add_shape(float_shape[vec_idx][j]);
- }
- tensor->set_elem_type(1);
- for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) {
- tensor->add_float_data(float_feed[vec_idx][j]);
- }
- vec_idx++;
- }
-
- VLOG(2) << "batch [" << bi << "] "
- << "float feed value prepared";
-
- vec_idx = 0;
- for (auto &name : int_feed_name) {
- int idx = _feed_name_to_idx[name];
- Tensor *tensor = tensor_vec[idx];
- VLOG(2) << "prepare int feed " << name << " shape size "
- << int_shape[vec_idx].size();
- for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
- tensor->add_shape(int_shape[vec_idx][j]);
- }
- tensor->set_elem_type(0);
- VLOG(3) << "feed var name " << name << " index " << vec_idx
- << "first data " << int_feed[vec_idx][0];
- for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
- tensor->add_int64_data(int_feed[vec_idx][j]);
- }
- vec_idx++;
- }
-
- VLOG(2) << "batch [" << bi << "] "
- << "int feed value prepared";
- }
-
- int64_t preprocess_end = timeline.TimeStampUS();
-
- int64_t client_infer_start = timeline.TimeStampUS();
-
- Response res;
-
- int64_t client_infer_end = 0;
- int64_t postprocess_start = 0;
- int64_t postprocess_end = 0;
-
- if (FLAGS_profile_client) {
- if (FLAGS_profile_server) {
- req.set_profile_server(true);
- }
- }
-
- res.Clear();
- if (_predictor->inference(&req, &res) != 0) {
- LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
- _api.thrd_clear();
- return -1;
- } else {
- client_infer_end = timeline.TimeStampUS();
- postprocess_start = client_infer_end;
- VLOG(2) << "get model output num";
- uint32_t model_num = res.outputs_size();
- VLOG(2) << "model num: " << model_num;
- for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
- VLOG(2) << "process model output index: " << m_idx;
- auto output = res.outputs(m_idx);
- ModelRes model;
- model.set_engine_name(output.engine_name());
-
- int idx = 0;
-
- for (auto &name : fetch_name) {
- // int idx = _fetch_name_to_idx[name];
- int shape_size = output.insts(0).tensor_array(idx).shape_size();
- VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
- << shape_size;
- model._shape_map[name].resize(shape_size);
- for (int i = 0; i < shape_size; ++i) {
- model._shape_map[name][i] =
- output.insts(0).tensor_array(idx).shape(i);
- }
- int lod_size = output.insts(0).tensor_array(idx).lod_size();
- if (lod_size > 0) {
- model._lod_map[name].resize(lod_size);
- for (int i = 0; i < lod_size; ++i) {
- model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
- }
- }
- idx += 1;
- }
-
- idx = 0;
- for (auto &name : fetch_name) {
- // int idx = _fetch_name_to_idx[name];
- if (_fetch_name_to_type[name] == 0) {
- VLOG(2) << "ferch var " << name << "type int";
- model._int64_value_map[name].resize(
- output.insts(0).tensor_array(idx).int64_data_size());
- int size = output.insts(0).tensor_array(idx).int64_data_size();
- for (int i = 0; i < size; ++i) {
- model._int64_value_map[name][i] =
- output.insts(0).tensor_array(idx).int64_data(i);
- }
- } else {
- VLOG(2) << "fetch var " << name << "type float";
- model._float_value_map[name].resize(
- output.insts(0).tensor_array(idx).float_data_size());
- int size = output.insts(0).tensor_array(idx).float_data_size();
- for (int i = 0; i < size; ++i) {
- model._float_value_map[name][i] =
- output.insts(0).tensor_array(idx).float_data(i);
- }
- }
- idx += 1;
- }
- predict_res_batch.add_model_res(std::move(model));
- }
- postprocess_end = timeline.TimeStampUS();
- }
-
- if (FLAGS_profile_client) {
- std::ostringstream oss;
- oss << "PROFILE\t"
- << "pid:" << pid << "\t"
- << "prepro_0:" << preprocess_start << " "
- << "prepro_1:" << preprocess_end << " "
- << "client_infer_0:" << client_infer_start << " "
- << "client_infer_1:" << client_infer_end << " ";
- if (FLAGS_profile_server) {
- int op_num = res.profile_time_size() / 2;
- for (int i = 0; i < op_num; ++i) {
- oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
- oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
- }
- }
-
- oss << "postpro_0:" << postprocess_start << " ";
- oss << "postpro_1:" << postprocess_end;
-
- fprintf(stderr, "%s\n", oss.str().c_str());
- }
-
- _api.thrd_clear();
- return 0;
-}
-
int PredictorClient::numpy_predict(
const std::vector>> &float_feed_batch,
const std::vector &float_feed_name,
const std::vector> &float_shape,
+ const std::vector> &float_lod_slot_batch,
const std::vector>> &int_feed_batch,
const std::vector &int_feed_name,
const std::vector> &int_shape,
+ const std::vector> &int_lod_slot_batch,
const std::vector &fetch_name,
PredictorRes &predict_res_batch,
- const int &pid) {
+ const int &pid,
+ const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
VLOG(2) << "batch size: " << batch_size;
predict_res_batch.clear();
@@ -362,6 +167,7 @@ int PredictorClient::numpy_predict(
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
+ req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
@@ -394,6 +200,9 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
+ for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
+ tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
+ }
tensor->set_elem_type(1);
const int float_shape_size = float_shape[vec_idx].size();
switch (float_shape_size) {
@@ -448,12 +257,22 @@ int PredictorClient::numpy_predict(
for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx];
- VLOG(2) << "prepare int feed " << name << " shape size "
- << int_shape[vec_idx].size();
+
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
- tensor->set_elem_type(0);
+ for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
+ tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
+ }
+ tensor->set_elem_type(_type[idx]);
+
+ if (_type[idx] == 0) {
+ VLOG(2) << "prepare int feed " << name << " shape size "
+ << int_shape[vec_idx].size();
+ } else {
+ VLOG(2) << "prepare int32 feed " << name << " shape size "
+ << int_shape[vec_idx].size();
+ }
const int int_shape_size = int_shape[vec_idx].size();
switch (int_shape_size) {
@@ -463,7 +282,11 @@ int PredictorClient::numpy_predict(
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
for (ssize_t l = 0; k < int_array.shape(3); l++) {
- tensor->add_int64_data(int_array(i, j, k, l));
+ if (_type[idx] == 0) {
+ tensor->add_int64_data(int_array(i, j, k, l));
+ } else {
+ tensor->add_int_data(int_array(i, j, k, l));
+ }
}
}
}
@@ -475,7 +298,11 @@ int PredictorClient::numpy_predict(
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
for (ssize_t k = 0; k < int_array.shape(2); k++) {
- tensor->add_int64_data(int_array(i, j, k));
+ if (_type[idx] == 0) {
+ tensor->add_int64_data(int_array(i, j, k));
+ } else {
+ tensor->add_int_data(int_array(i, j, k));
+ }
}
}
}
@@ -485,7 +312,11 @@ int PredictorClient::numpy_predict(
auto int_array = int_feed[vec_idx].unchecked<2>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
for (ssize_t j = 0; j < int_array.shape(1); j++) {
- tensor->add_int64_data(int_array(i, j));
+ if (_type[idx] == 0) {
+ tensor->add_int64_data(int_array(i, j));
+ } else {
+ tensor->add_int_data(int_array(i, j));
+ }
}
}
break;
@@ -493,7 +324,11 @@ int PredictorClient::numpy_predict(
case 1: {
auto int_array = int_feed[vec_idx].unchecked<1>();
for (ssize_t i = 0; i < int_array.shape(0); i++) {
- tensor->add_int64_data(int_array(i));
+ if (_type[idx] == 0) {
+ tensor->add_int64_data(int_array(i));
+ } else {
+ tensor->add_int_data(int_array(i));
+ }
}
break;
}
@@ -563,23 +398,23 @@ int PredictorClient::numpy_predict(
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == 0) {
- VLOG(2) << "ferch var " << name << "type int";
- model._int64_value_map[name].resize(
- output.insts(0).tensor_array(idx).int64_data_size());
+ VLOG(2) << "ferch var " << name << "type int64";
int size = output.insts(0).tensor_array(idx).int64_data_size();
- for (int i = 0; i < size; ++i) {
- model._int64_value_map[name][i] =
- output.insts(0).tensor_array(idx).int64_data(i);
- }
- } else {
+ model._int64_value_map[name] = std::vector(
+ output.insts(0).tensor_array(idx).int64_data().begin(),
+ output.insts(0).tensor_array(idx).int64_data().begin() + size);
+ } else if (_fetch_name_to_type[name] == 1) {
VLOG(2) << "fetch var " << name << "type float";
- model._float_value_map[name].resize(
- output.insts(0).tensor_array(idx).float_data_size());
int size = output.insts(0).tensor_array(idx).float_data_size();
- for (int i = 0; i < size; ++i) {
- model._float_value_map[name][i] =
- output.insts(0).tensor_array(idx).float_data(i);
- }
+ model._float_value_map[name] = std::vector(
+ output.insts(0).tensor_array(idx).float_data().begin(),
+ output.insts(0).tensor_array(idx).float_data().begin() + size);
+ } else if (_fetch_name_to_type[name] == 2) {
+ VLOG(2) << "fetch var " << name << "type int32";
+ int size = output.insts(0).tensor_array(idx).int_data_size();
+ model._int32_value_map[name] = std::vector(
+ output.insts(0).tensor_array(idx).int_data().begin(),
+ output.insts(0).tensor_array(idx).int_data().begin() + size);
}
idx += 1;
}
@@ -613,7 +448,6 @@ int PredictorClient::numpy_predict(
_api.thrd_clear();
return 0;
}
-
} // namespace general_model
} // namespace paddle_serving
} // namespace baidu
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index 3e065e4de1ff3c01ff6bc05cb39a2607620915b4..a0ac6caf2e42d9c4eee475648a371681ad30b135 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -95,52 +95,34 @@ PYBIND11_MODULE(serving_client, m) {
[](PredictorClient &self) { self.create_predictor(); })
.def("destroy_predictor",
[](PredictorClient &self) { self.destroy_predictor(); })
- .def("batch_predict",
- [](PredictorClient &self,
- const std::vector>>
- &float_feed_batch,
- const std::vector &float_feed_name,
- const std::vector> &float_shape,
- const std::vector>>
- &int_feed_batch,
- const std::vector &int_feed_name,
- const std::vector> &int_shape,
- const std::vector &fetch_name,
- PredictorRes &predict_res_batch,
- const int &pid) {
- return self.batch_predict(float_feed_batch,
- float_feed_name,
- float_shape,
- int_feed_batch,
- int_feed_name,
- int_shape,
- fetch_name,
- predict_res_batch,
- pid);
- },
- py::call_guard())
.def("numpy_predict",
[](PredictorClient &self,
const std::vector>>
&float_feed_batch,
const std::vector &float_feed_name,
const std::vector> &float_shape,
+ const std::vector> &float_lod_slot_batch,
const std::vector>>
&int_feed_batch,
const std::vector &int_feed_name,
const std::vector> &int_shape,
+ const std::vector> &int_lod_slot_batch,
const std::vector &fetch_name,
PredictorRes &predict_res_batch,
- const int &pid) {
+ const int &pid,
+ const uint64_t log_id) {
return self.numpy_predict(float_feed_batch,
float_feed_name,
float_shape,
+ float_lod_slot_batch,
int_feed_batch,
int_feed_name,
int_shape,
+ int_lod_slot_batch,
fetch_name,
predict_res_batch,
- pid);
+ pid,
+ log_id);
},
py::call_guard());
}
diff --git a/core/general-server/CMakeLists.txt b/core/general-server/CMakeLists.txt
index 9056e229a51f56463dc2eec5629f219d00dc6a38..aa1b7badc9140301d84bdbd94b3324b52176e837 100644
--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -9,7 +9,7 @@ endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
-
+ include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
@@ -29,7 +29,11 @@ if(WITH_GPU)
endif()
if(WITH_MKL OR WITH_GPU)
+ if (WITH_TRT)
+ target_link_libraries(serving -liomp5 -lmklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
+ else()
target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
+endif()
else()
target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
diff --git a/core/general-server/op/general_copy_op.cpp b/core/general-server/op/general_copy_op.cpp
index 322bcc07795f1b053847991eae17cb3922dd7a7b..0391a98bcb7f471c0a0687dd9deb7b404a15a2bf 100644
--- a/core/general-server/op/general_copy_op.cpp
+++ b/core/general-server/op/general_copy_op.cpp
@@ -45,36 +45,41 @@ int GeneralCopyOp::inference() {
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument(pre_name);
- VLOG(2) << "precedent name: " << pre_name;
+ uint64_t log_id = input_blob->GetLogId();
+
+ VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
const TensorVector *in = &input_blob->tensor_vector;
- VLOG(2) << "input size: " << in->size();
+ VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
int batch_size = input_blob->GetBatchSize();
int input_var_num = 0;
GeneralBlob *res = mutable_data();
+ res->SetLogId(log_id);
TensorVector *out = &res->tensor_vector;
- VLOG(2) << "input batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
res->SetBatchSize(batch_size);
if (!res) {
- LOG(ERROR) << "Failed get op tls reader object output";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
- VLOG(2) << "Going to init lod tensor";
+ VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
for (int i = 0; i < in->size(); ++i) {
paddle::PaddleTensor lod_tensor;
CopyLod(&in->at(i), &lod_tensor);
lod_tensor.dtype = in->at(i).dtype;
lod_tensor.name = in->at(i).name;
- VLOG(2) << "lod tensor [" << i << "].name = " << lod_tensor.name;
+ VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
+ << "].name = " << lod_tensor.name;
out->push_back(lod_tensor);
}
- VLOG(2) << "pack done.";
+ VLOG(2) << "(logid=" << log_id << ") pack done.";
for (int i = 0; i < out->size(); ++i) {
int64_t *src_ptr = static_cast(in->at(i).data.data());
@@ -86,7 +91,7 @@ int GeneralCopyOp::inference() {
}
}
- VLOG(2) << "output done.";
+ VLOG(2) << "(logid=" << log_id << ") output done.";
timeline.Pause();
int64_t end = timeline.TimeStampUS();
@@ -94,7 +99,7 @@ int GeneralCopyOp::inference() {
AddBlobInfo(res, start);
AddBlobInfo(res, end);
- VLOG(2) << "read data from client success";
+ VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
diff --git a/core/general-server/op/general_copy_op.h b/core/general-server/op/general_copy_op.h
index 89627ffb9e4d15bbcbfa6c7fc3a608ada03dad6e..9b4caadc6a82f1f1a601ab66394b3f629af703ff 100644
--- a/core/general-server/op/general_copy_op.h
+++ b/core/general-server/op/general_copy_op.h
@@ -13,20 +13,12 @@
// limitations under the License.
#pragma once
-#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include
+#include
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_dist_kv_infer_op.cpp b/core/general-server/op/general_dist_kv_infer_op.cpp
index 9c6c70352b5387fab95acd16cdf79aa2b46f6122..6809907226511f7de576f1e2bbdc21b7ac401422 100644
--- a/core/general-server/op/general_dist_kv_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_infer_op.cpp
@@ -50,18 +50,20 @@ int GeneralDistKVInferOp::inference() {
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument(pre_name);
- VLOG(2) << "Get precedent op name: " << pre_name;
+ uint64_t log_id = input_blob->GetLogId();
+ VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data();
if (!input_blob) {
- LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
- VLOG(2) << "input batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector keys;
std::vector values;
int sparse_count = 0;
@@ -90,16 +92,20 @@ int GeneralDistKVInferOp::inference() {
keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second;
}
+ Timer timeline;
+ int64_t cube_start = timeline.TimeStampUS();
+ timeline.Start();
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector table_names = cube->get_table_names();
if (table_names.size() == 0) {
- LOG(ERROR) << "cube init error or cube config not given.";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") cube init error or cube config not given.";
return -1;
}
int ret = cube->seek(table_names[0], keys, &values);
-
+ int64_t cube_end = timeline.TimeStampUS();
if (values.size() != keys.size() || values[0].buff.size() == 0) {
- LOG(ERROR) << "cube value return null";
+ LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
}
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
TensorVector sparse_out;
@@ -150,21 +156,23 @@ int GeneralDistKVInferOp::inference() {
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
output_blob->SetBatchSize(batch_size);
+ output_blob->SetLogId(log_id);
- VLOG(2) << "infer batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
- Timer timeline;
int64_t start = timeline.TimeStampUS();
- timeline.Start();
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
- LOG(ERROR) << "Failed do infer in fluid model: " << engine_name();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed do infer in fluid model: " << engine_name();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
+ AddBlobInfo(output_blob, cube_start);
+ AddBlobInfo(output_blob, cube_end);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
diff --git a/core/general-server/op/general_dist_kv_infer_op.h b/core/general-server/op/general_dist_kv_infer_op.h
index 2dee5bca6f9e12dbb8b36a6c39aa0a8e77763d23..56d19ee366feaf000d7b24f4017b39155b7e65c1 100644
--- a/core/general-server/op/general_dist_kv_infer_op.h
+++ b/core/general-server/op/general_dist_kv_infer_op.h
@@ -15,17 +15,9 @@
#pragma once
#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_dist_kv_quant_infer_op.cpp b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
index 8752e8a72085c946b097cecf62a0bdbf90d682c4..93ce76f3d3399ac62435352d2271154ab7f84235 100644
--- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
@@ -59,10 +59,13 @@ int GeneralDistKVQuantInferOp::inference() {
return -1;
}
+ uint64_t log_id = input_blob->GetLogId();
+ output_blob->SetLogId(log_id);
+
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
- VLOG(2) << "input batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector keys;
std::vector values;
int sparse_count = 0;
@@ -94,13 +97,14 @@ int GeneralDistKVQuantInferOp::inference() {
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector table_names = cube->get_table_names();
if (table_names.size() == 0) {
- LOG(ERROR) << "cube init error or cube config not given.";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") cube init error or cube config not given.";
return -1;
}
int ret = cube->seek(table_names[0], keys, &values);
if (values.size() != keys.size() || values[0].buff.size() == 0) {
- LOG(ERROR) << "cube value return null";
+ LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
}
TensorVector sparse_out;
@@ -182,7 +186,7 @@ int GeneralDistKVQuantInferOp::inference() {
output_blob->SetBatchSize(batch_size);
- VLOG(2) << "infer batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
@@ -190,7 +194,8 @@ int GeneralDistKVQuantInferOp::inference() {
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
- LOG(ERROR) << "Failed do infer in fluid model: " << engine_name();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed do infer in fluid model: " << engine_name();
return -1;
}
diff --git a/core/general-server/op/general_dist_kv_quant_infer_op.h b/core/general-server/op/general_dist_kv_quant_infer_op.h
index e153311a2a2e2df1bd12720e2ce6cbe9ddb31ec0..0f99e2072374bc4bc0b76a1ca876a152f98488b6 100644
--- a/core/general-server/op/general_dist_kv_quant_infer_op.h
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.h
@@ -15,17 +15,9 @@
#pragma once
#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h
index 4fa1995664a2dca449ebc228079c86919a32d328..40320348349a43aa79ce0d599f3aebeb764dc10e 100644
--- a/core/general-server/op/general_infer_helper.h
+++ b/core/general-server/op/general_infer_helper.h
@@ -15,17 +15,9 @@
#pragma once
#include
+#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
#include "paddle_inference_api.h" // NOLINT
-#endif
-#include
namespace baidu {
namespace paddle_serving {
@@ -35,6 +27,7 @@ struct GeneralBlob {
std::vector tensor_vector;
int64_t time_stamp[20];
int p_size = 0;
+ uint64_t _log_id = -1; // for logging
int _batch_size;
@@ -46,9 +39,11 @@ struct GeneralBlob {
tensor_vector.clear();
}
- int SetBatchSize(int batch_size) { _batch_size = batch_size; }
+ void SetBatchSize(int batch_size) { _batch_size = batch_size; }
+ void SetLogId(uint64_t log_id) { _log_id = log_id; }
int GetBatchSize() const { return _batch_size; }
+ uint64_t GetLogId() const { return _log_id; }
std::string ShortDebugString() const { return "Not implemented!"; }
};
diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp
index a9ff2e7226b25842889e391d82217b3b6a140170..b9478542c71e04b0f3f80b277da7d8d41f636d3d 100644
--- a/core/general-server/op/general_infer_op.cpp
+++ b/core/general-server/op/general_infer_op.cpp
@@ -47,22 +47,26 @@ int GeneralInferOp::inference() {
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument(pre_name);
- VLOG(2) << "Get precedent op name: " << pre_name;
+ uint64_t log_id = input_blob->GetLogId();
+ VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data();
+ output_blob->SetLogId(log_id);
if (!input_blob) {
- LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
- int batch_size = input_blob->GetBatchSize();
- VLOG(2) << "input batch size: " << batch_size;
- output_blob->SetBatchSize(batch_size);
+ int batch_size = input_blob->_batch_size;
+ VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
- VLOG(2) << "infer batch size: " << batch_size;
+ output_blob->_batch_size = batch_size;
+
+ VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
@@ -70,7 +74,8 @@ int GeneralInferOp::inference() {
if (InferManager::instance().infer(
engine_name().c_str(), in, out, batch_size)) {
- LOG(ERROR) << "Failed do infer in fluid model: " << engine_name().c_str();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed do infer in fluid model: " << engine_name().c_str();
return -1;
}
diff --git a/core/general-server/op/general_infer_op.h b/core/general-server/op/general_infer_op.h
index ff0b210ad7c6824a7e8a61e9ac504a65eafa4c58..b41784185ff445c540774b8b24ef897caf6fbf96 100644
--- a/core/general-server/op/general_infer_op.h
+++ b/core/general-server/op/general_infer_op.h
@@ -15,17 +15,9 @@
#pragma once
#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 7d48949b22d0ace289ab3b9214f092819f5476e0..0329fac6b9bb6eda59f3f6f1589cd00c3eec0fd9 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -37,9 +37,9 @@ int conf_check(const Request *req,
const std::shared_ptr &model_config) {
int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) {
- VLOG(2) << "var num: " << var_num;
- VLOG(2) << "model config var num: " << model_config->_feed_type.size();
- LOG(ERROR) << "feed var number not match.";
+ LOG(ERROR) << "feed var number not match: model config["
+ << model_config->_feed_type.size() << "] vs. actual[" << var_num
+ << "]";
return -1;
}
@@ -72,8 +72,7 @@ int conf_check(const Request *req,
int GeneralReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast(get_request_message());
-
- int batch_size = req->insts_size();
+ uint64_t log_id = req->log_id();
int input_var_num = 0;
std::vector elem_type;
std::vector elem_size;
@@ -82,26 +81,29 @@ int GeneralReaderOp::inference() {
GeneralBlob *res = mutable_data();
TensorVector *out = &res->tensor_vector;
- res->SetBatchSize(batch_size);
+ res->SetLogId(log_id);
if (!res) {
- LOG(ERROR) << "Failed get op tls reader object output";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
- VLOG(2) << "var num: " << var_num;
+ VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
+
+ VLOG(2) << "(logid=" << log_id
+ << ") start to call load general model_conf op";
- VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
- VLOG(2) << "get resource pointer done.";
+ VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr model_config =
resource.get_general_model_config();
- VLOG(2) << "print general model config done.";
+ VLOG(2) << "(logid=" << log_id << ") print general model config done.";
// TODO(guru4elephant): how to do conditional check?
/*
@@ -117,7 +119,6 @@ int GeneralReaderOp::inference() {
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
-
// prepare basic information for input
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
@@ -126,71 +127,79 @@ int GeneralReaderOp::inference() {
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
- } else {
+ } else if (elem_type[i] == 1) {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
+ } else if (elem_type[i] == 2) {
+ elem_size[i] = sizeof(int32_t);
+ lod_tensor.dtype = paddle::PaddleDType::INT32;
}
-
- if (model_config->_is_lod_feed[i]) {
+ // implement lod tensor here
+ if (req->insts(0).tensor_array(i).lod_size() > 0) {
+ VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
lod_tensor.lod.resize(1);
- lod_tensor.lod[0].push_back(0);
- VLOG(2) << "var[" << i << "] is lod_tensor";
+ for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) {
+ lod_tensor.lod[0].push_back(req->insts(0).tensor_array(i).lod(k));
+ }
+ capacity[i] = 1;
+ for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
+ int dim = req->insts(0).tensor_array(i).shape(k);
+ VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
+ << "]: " << dim;
+ capacity[i] *= dim;
+ lod_tensor.shape.push_back(dim);
+ }
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
+ << "] is tensor, capacity: " << capacity[i];
} else {
- lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
- VLOG(2) << "shape for var[" << i << "]: " << dim;
+ VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
+ << "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
- VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i];
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
+ << "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
-
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
int tensor_size = 0;
- for (int j = 0; j < batch_size; ++j) {
- const Tensor &tensor = req->insts(j).tensor_array(i);
- int data_len = 0;
- if (tensor.int64_data_size() > 0) {
- data_len = tensor.int64_data_size();
- } else {
- data_len = tensor.float_data_size();
- }
- VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
- tensor_size += data_len;
-
- int cur_len = out->at(i).lod[0].back();
- VLOG(2) << "current len: " << cur_len;
-
- int sample_len = 0;
- if (tensor.shape_size() == 1) {
- sample_len = data_len;
- } else {
- sample_len = tensor.shape(0);
- }
- out->at(i).lod[0].push_back(cur_len + sample_len);
- VLOG(2) << "new len: " << cur_len + sample_len;
+ const Tensor &tensor = req->insts(0).tensor_array(i);
+ int data_len = 0;
+ if (tensor.int64_data_size() > 0) {
+ data_len = tensor.int64_data_size();
+ } else if (tensor.float_data_size() > 0) {
+ data_len = tensor.float_data_size();
+ } else if (tensor.int_data_size() > 0) {
+ data_len = tensor.int_data_size();
}
- out->at(i).data.Resize(tensor_size * elem_size[i]);
- out->at(i).shape = {out->at(i).lod[0].back()};
- for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
- out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
+ VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
+ << "]: " << data_len;
+ tensor_size += data_len;
+
+ int cur_len = out->at(i).lod[0].back();
+ VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
+
+ int sample_len = 0;
+ if (tensor.shape_size() == 1) {
+ sample_len = data_len;
+ } else {
+ sample_len = tensor.shape(0);
}
- if (out->at(i).shape.size() == 1) {
- out->at(i).shape.push_back(1);
- }
- VLOG(2) << "var[" << i
+ VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
+ out->at(i).data.Resize(tensor_size * elem_size[i]);
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
- out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
- VLOG(2) << "var[" << i
- << "] is tensor and capacity=" << batch_size * capacity[i];
+ out->at(i).data.Resize(capacity[i] * elem_size[i]);
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
+ << "] is tensor and capacity=" << capacity[i];
}
}
@@ -198,44 +207,43 @@ int GeneralReaderOp::inference() {
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast(out->at(i).data.data());
+ VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+ << "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0;
- for (int j = 0; j < batch_size; ++j) {
- int elem_num = req->insts(j).tensor_array(i).int64_data_size();
- for (int k = 0; k < elem_num; ++k) {
- dst_ptr[offset + k] = req->insts(j).tensor_array(i).int64_data(k);
- }
- if (out->at(i).lod.size() == 1) {
- offset = out->at(i).lod[0][j + 1];
- } else {
- offset += capacity[i];
- }
+ int elem_num = req->insts(0).tensor_array(i).int64_data_size();
+ for (int k = 0; k < elem_num; ++k) {
+ dst_ptr[offset + k] = req->insts(0).tensor_array(i).int64_data(k);
}
- } else {
+ } else if (elem_type[i] == 1) {
float *dst_ptr = static_cast(out->at(i).data.data());
+ VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+ << "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0;
- for (int j = 0; j < batch_size; ++j) {
- int elem_num = req->insts(j).tensor_array(i).float_data_size();
- for (int k = 0; k < elem_num; ++k) {
- dst_ptr[offset + k] = req->insts(j).tensor_array(i).float_data(k);
- }
- if (out->at(i).lod.size() == 1) {
- offset = out->at(i).lod[0][j + 1];
- } else {
- offset += capacity[i];
- }
+ int elem_num = req->insts(0).tensor_array(i).float_data_size();
+ for (int k = 0; k < elem_num; ++k) {
+ dst_ptr[offset + k] = req->insts(0).tensor_array(i).float_data(k);
+ }
+ } else if (elem_type[i] == 2) {
+ int32_t *dst_ptr = static_cast(out->at(i).data.data());
+ VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
+ << "] is " << req->insts(0).tensor_array(i).int_data(0);
+ int offset = 0;
+ int elem_num = req->insts(0).tensor_array(i).int_data_size();
+ for (int k = 0; k < elem_num; ++k) {
+ dst_ptr[offset + k] = req->insts(0).tensor_array(i).int_data(k);
}
}
}
- VLOG(2) << "output size: " << out->size();
-
+ VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
+ res->_batch_size = 1;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
- VLOG(2) << "read data from client success";
+ VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralReaderOp);
diff --git a/core/general-server/op/general_reader_op.h b/core/general-server/op/general_reader_op.h
index c45d6ad5139a7a9a267f1c6556028a99295500de..cb9693982ff659214dd21ff09f189f86b6b3a339 100644
--- a/core/general-server/op/general_reader_op.h
+++ b/core/general-server/op/general_reader_op.h
@@ -13,21 +13,13 @@
// limitations under the License.
#pragma once
-#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include
+#include
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 5667a174d9bb6e134e58de72524c60839dc82356..5f80510f79f8acf09aed9f7f65e84b9cfaa9a8ed 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -42,6 +42,9 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralResponseOp::inference() {
const std::vector pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size();
+ const GeneralBlob *input_blob;
+ uint64_t log_id =
+ get_depend_argument(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast(get_request_message());
// response inst with only fetch_var_names
@@ -52,15 +55,17 @@ int GeneralResponseOp::inference() {
// timeline.Start();
int64_t start = timeline.TimeStampUS();
- VLOG(2) << "start to call load general model_conf op";
+ VLOG(2) << "(logid=" << log_id
+ << ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
- VLOG(2) << "get resource pointer done.";
+ VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr model_config =
resource.get_general_model_config();
- VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
+ VLOG(2) << "(logid=" << log_id
+ << ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector fetch_index;
fetch_index.resize(req->fetch_var_names_size());
@@ -69,16 +74,16 @@ int GeneralResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
- const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
- VLOG(2) << "pre names[" << pi << "]: " << pre_name << " ("
- << pre_node_names.size() << ")";
+ VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
+ << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument(pre_name);
// fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
// input_blob);
if (!input_blob) {
- LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed mutable depended argument, op: " << pre_name;
return -1;
}
@@ -91,19 +96,20 @@ int GeneralResponseOp::inference() {
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
- tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
- VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
- << " is lod_tensor";
+ VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+ << model_config->_fetch_name[idx] << " is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
- VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+ VLOG(2) << "(logid=" << log_id << ") shape[" << k
+ << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
} else {
- VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
- << " is tensor";
+ VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+ << model_config->_fetch_name[idx] << " is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
- VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+ VLOG(2) << "(logid=" << log_id << ") shape[" << k
+ << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
@@ -115,49 +121,51 @@ int GeneralResponseOp::inference() {
for (int j = 0; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
- if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
- VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
- << "].";
+
+ FetchInst *fetch_p = output->mutable_insts(0);
+ auto dtype = in->at(idx).dtype;
+
+ if (dtype == paddle::PaddleDType::INT64) {
+ VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
+ << model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast(in->at(idx).data.data());
- if (model_config->_is_lod_fetch[idx]) {
- FetchInst *fetch_p = output->mutable_insts(0);
- for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
- fetch_p->mutable_tensor_array(var_idx)->add_lod(
- in->at(idx).lod[0][j]);
- }
- for (int j = 0; j < cap; ++j) {
- fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
- }
- } else {
- FetchInst *fetch_p = output->mutable_insts(0);
- for (int j = 0; j < cap; ++j) {
- fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
- }
- }
- VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
- var_idx++;
- } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
- VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
- << "].";
+ // from
+ // https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
+ // `Swap` method is faster than `{}` method.
+ google::protobuf::RepeatedField tmp_data(data_ptr,
+ data_ptr + cap);
+ fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
+ &tmp_data);
+ } else if (dtype == paddle::PaddleDType::FLOAT32) {
+ VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
+ << model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast(in->at(idx).data.data());
- if (model_config->_is_lod_fetch[idx]) {
- FetchInst *fetch_p = output->mutable_insts(0);
+ google::protobuf::RepeatedField tmp_data(data_ptr,
+ data_ptr + cap);
+ fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
+ &tmp_data);
+ } else if (dtype == paddle::PaddleDType::INT32) {
+ VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
+ << model_config->_fetch_name[idx] << "].";
+ int32_t *data_ptr = static_cast(in->at(idx).data.data());
+ google::protobuf::RepeatedField tmp_data(data_ptr,
+ data_ptr + cap);
+ fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
+ &tmp_data);
+ }
+
+ if (model_config->_is_lod_fetch[idx]) {
+ if (in->at(idx).lod.size() > 0) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
- for (int j = 0; j < cap; ++j) {
- fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
- }
- } else {
- FetchInst *fetch_p = output->mutable_insts(0);
- for (int j = 0; j < cap; ++j) {
- fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
- }
}
- VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
- var_idx++;
}
+
+ VLOG(2) << "(logid=" << log_id << ") fetch var ["
+ << model_config->_fetch_name[idx] << "] ready";
+ var_idx++;
}
}
@@ -169,7 +177,8 @@ int GeneralResponseOp::inference() {
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument(pre_node_names[pi]);
- VLOG(2) << "p size for input blob: " << input_blob->p_size;
+ VLOG(2) << "(logid=" << log_id
+ << ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
diff --git a/core/general-server/op/general_response_op.h b/core/general-server/op/general_response_op.h
index 4b0f6ed17b5a66dbda7bccef25cec03bf044e6c5..0f72b8f98df336dd515560129a8cfd27650738bb 100644
--- a/core/general-server/op/general_response_op.h
+++ b/core/general-server/op/general_response_op.h
@@ -15,16 +15,8 @@
#pragma once
#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include "core/general-server/general_model_service.pb.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp
index 154e975d314a72515624b7bbf1aff85f70b8b5d3..3fa433c6cc31a3dbce331013780212d50e7f643c 100644
--- a/core/general-server/op/general_text_reader_op.cpp
+++ b/core/general-server/op/general_text_reader_op.cpp
@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast(get_request_message());
+ uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() {
std::vector capacity;
GeneralBlob *res = mutable_data();
- TensorVector *out = &res->tensor_vector;
-
- res->SetBatchSize(batch_size);
if (!res) {
- LOG(ERROR) << "Failed get op tls reader object output";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed get op tls reader object output";
}
+ TensorVector *out = &res->tensor_vector;
+ res->SetBatchSize(batch_size);
+ res->SetLogId(log_id);
+
if (batch_size <= 0) {
- LOG(ERROR) << "Batch size < 0";
+ LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1;
}
@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() {
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
- VLOG(2) << "var num: " << var_num;
+ VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
- VLOG(2) << "start to call load general model_conf op";
+ VLOG(2) << "(logid=" << log_id
+ << ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
- VLOG(2) << "get resource pointer done.";
+ VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr model_config =
resource.get_general_model_config();
- VLOG(2) << "print general model config done.";
+ VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num);
elem_size.resize(var_num);
@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
- VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
+ << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() {
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
- VLOG(2) << "var[" << i << "] is lod_tensor";
+ VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
- VLOG(2) << "shape for var[" << i << "]: " << dim;
+ VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
+ << "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
- VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i];
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
+ << "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() {
}
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
- VLOG(2) << "var[" << i
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
- VLOG(2) << "var[" << i
+ VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() {
AddBlobInfo(res, start);
AddBlobInfo(res, end);
- VLOG(2) << "read data from client success";
+ VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralTextReaderOp);
diff --git a/core/general-server/op/general_text_reader_op.h b/core/general-server/op/general_text_reader_op.h
index ca134256fce4aaa003f4b07033d4c471ebdb59b7..af822993dc37fae23c1fa584d640cbfe8d9950c8 100644
--- a/core/general-server/op/general_text_reader_op.h
+++ b/core/general-server/op/general_text_reader_op.h
@@ -13,21 +13,13 @@
// limitations under the License.
#pragma once
-#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include
+#include
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp
index ae194119f1fc3edad01662041035f7011873998a..03eea7d76c83782b661ea4553fc5fc0eee99e372 100644
--- a/core/general-server/op/general_text_response_op.cpp
+++ b/core/general-server/op/general_text_response_op.cpp
@@ -40,6 +40,9 @@ int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size();
+ const GeneralBlob *input_blob;
+ uint64_t log_id =
+ get_depend_argument(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast(get_request_message());
// response inst with only fetch_var_names
@@ -48,11 +51,12 @@ int GeneralTextResponseOp::inference() {
Timer timeline;
int64_t start = timeline.TimeStampUS();
- VLOG(2) << "start to call load general model_conf op";
+ VLOG(2) << "(logid=" << log_id
+ << ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
- VLOG(2) << "get resource pointer done.";
+ VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr model_config =
resource.get_general_model_config();
@@ -63,20 +67,20 @@ int GeneralTextResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
- const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
- VLOG(2) << "pre names[" << pi << "]: " << pre_name << " ("
- << pre_node_names.size() << ")";
+ VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
+ << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument(pre_name);
if (!input_blob) {
- LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed mutable depended argument, op: " << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
- VLOG(2) << "input batch size: " << batch_size;
+ VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
ModelOutput *output = res->add_outputs();
output->set_engine_name(
@@ -88,12 +92,13 @@ int GeneralTextResponseOp::inference() {
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
- VLOG(2) << "out[" << idx << " is lod_tensor";
+ VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
- VLOG(2) << "out[" << idx << "] is tensor";
+ VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
- VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
+ VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
+ << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
@@ -137,7 +142,8 @@ int GeneralTextResponseOp::inference() {
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument(pre_node_names[pi]);
- VLOG(2) << "p size for input blob: " << input_blob->p_size;
+ VLOG(2) << "(logid=" << log_id
+ << ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
diff --git a/core/general-server/op/general_text_response_op.h b/core/general-server/op/general_text_response_op.h
index 52f7bbf0f7d76122bad14cf513302f70c35aa1d8..334d98476e67f745635f7d66d7b8682de62da355 100644
--- a/core/general-server/op/general_text_response_op.h
+++ b/core/general-server/op/general_text_response_op.h
@@ -15,17 +15,9 @@
#pragma once
#include
#include
-#ifdef BCLOUD
-#ifdef WITH_GPU
-#include "paddle/paddle_inference_api.h"
-#else
-#include "paddle/fluid/inference/api/paddle_inference_api.h"
-#endif
-#else
-#include "paddle_inference_api.h" // NOLINT
-#endif
#include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto
index 8581ecb2a2e10deced910a20ce26c2beaca956fa..e7dd5fccf54be43db8e65a9ed1112ceaece93700 100644
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
+ required uint64 log_id = 4 [ default = 0 ];
};
message Response {
diff --git a/core/general-server/proto/load_general_model_service.proto b/core/general-server/proto/load_general_model_service.proto
index b8a86497f8c0b683f4e95f4517d83f576e79baad..f844bd5b2c0ddb34a32d00559b087c2fbb2ebfed 100644
--- a/core/general-server/proto/load_general_model_service.proto
+++ b/core/general-server/proto/load_general_model_service.proto
@@ -21,6 +21,7 @@ option cc_generic_services = true;
message RequestAndResponse {
required int32 a = 1;
required float b = 2;
+ required uint64 log_id = 3 [ default = 0 ];
};
service LoadGeneralModelService {
diff --git a/core/pdcodegen/plugin/pdcodegen b/core/pdcodegen/plugin/pdcodegen
deleted file mode 100755
index bb81217121a15b99cda8a320f357f716357f96c5..0000000000000000000000000000000000000000
Binary files a/core/pdcodegen/plugin/pdcodegen and /dev/null differ
diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp
index af4081a985ece584f82120799fc9a384f83830be..c505ca66385dd363ad0a76470012f07a925bcd17 100644
--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -280,25 +280,29 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n"
" static_cast(cntl_base);\n"
+ " uint64_t log_id = request->log_id();\n"
+ " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
- " LOG(ERROR) << \"Not found service: $service$\";\n"
+ " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
+ "$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
- " LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
- " LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
- " int err_code = svr->inference(request, response);\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
+ "<< cntl->remote_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
+ "<< cntl->local_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
+ "<< \"$name$\" << \"\]\";\n"
+ " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
- " << \"Failed call inferservice[$name$], name[$service$]\"\n"
+ " << \"(logid=\" << log_id << \") Failed call "
+ "inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
@@ -306,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
- " LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
+ "start) << \"\]\";\n", // NOLINT
"name",
class_name,
"service",
@@ -317,26 +322,31 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n"
" static_cast(cntl_base);\n"
+ " uint64_t log_id = equest->log_id();\n"
+ " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
- " LOG(ERROR) << \"Not found service: $service$\";\n"
+ " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
+ "$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
- " LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
- " LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
+ "<< cntl->remote_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
+ "<< cntl->local_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
+ "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n"
- " int err_code = svr->inference(request, response, &debug_os);\n"
+ " int err_code = svr->inference(request, response, log_id, "
+ "&debug_os);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
- " << \"Failed call inferservice[$name$], name[$service$]\"\n"
+ " << \"(logid=\" << log_id << \") Failed call "
+ "inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
@@ -345,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
- " LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
+ "start) << \"\]\";\n"
" LOG(INFO)\n"
- " << \"TC=[\" << (end - start) << \"] Received debug "
+ " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
+ "\"] Received debug "
"request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n",
@@ -1011,25 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n"
" static_cast(cntl_base);\n"
+ " uint64_t log_id = request->log_id();\n"
+ " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
- " LOG(ERROR) << \"Not found service: $service$\";\n"
+ " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
+ "$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
- " LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") "
+ "remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
- " LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") "
+ "local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
- " LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
- " LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
- " int err_code = svr->inference(request, response);\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") "
+ "service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
+ " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
- " << \"Failed call inferservice[$name$], name[$service$]\"\n"
+ " << \"(logid=\" << log_id << \") Failed call "
+ "inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
@@ -1037,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
- " LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
+ "start) << \"\]\";\n", // NOLINT
"name",
class_name,
"service",
@@ -1048,26 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n"
" static_cast(cntl_base);\n"
+ " uint64_t log_id = request->log_id();\n"
+ " cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
- " LOG(ERROR) << \"Not found service: $service$\";\n"
+ " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
+ "$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
- " LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
- "\"\]\";\n"
- " LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
- " LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
+ " << cntl->remote_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
+ "<< cntl->local_side() << \"\]\";\n"
+ " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
+ "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n"
- " int err_code = svr->inference(request, response, &debug_os);\n"
+ " int err_code = svr->inference(request, response, log_id, "
+ "&debug_os);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
- " << \"Failed call inferservice[$name$], name[$service$]\"\n"
+ " << \"(logid=\" << log_id << \") Failed call "
+ "inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
@@ -1076,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
- " LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT
+ " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
+ "start) << \"\]\";\n" // NOLINT
" LOG(INFO)\n"
- " << \"TC=[\" << (end - start) << \"] Received debug "
+ " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
+ "\"] Received debug "
"request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n",
diff --git a/core/predictor/CMakeLists.txt b/core/predictor/CMakeLists.txt
index 1b9dc7b29845a2b8c7f958c1d8e836cb57e91d41..637c7c15530273bc908ec2f8693a3d66989eebd2 100644
--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -6,14 +6,16 @@ include(framework/CMakeLists.txt)
include(tools/CMakeLists.txt)
include(src/CMakeLists.txt)
-
+add_definitions(-D__STDC_FORMAT_MACROS)
add_library(pdserving ${pdserving_srcs})
set_source_files_properties(
${pdserving_srcs}
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
-
+if (WITH_TRT)
+ add_definitions(-DWITH_TRT)
+endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
diff --git a/core/predictor/common/inner_common.h b/core/predictor/common/inner_common.h
index 96b8a8027070da559e239cdc5f6057d534ff3412..f6847146ba14b2b9fc1b07485c748e6e8300d7bd 100644
--- a/core/predictor/common/inner_common.h
+++ b/core/predictor/common/inner_common.h
@@ -50,7 +50,7 @@
#include "butil/time.h"
#endif
-#include "glog/raw_logging.h"
+#define ERROR_STRING_LEN 10240
#include "core/configure/general_model_config.pb.h"
#include "core/configure/include/configure_parser.h"
diff --git a/core/predictor/framework/channel.h b/core/predictor/framework/channel.h
index a48368329469f36ab7881972e6a7059ab8066b5d..67808be16409cdf0610363d0039accf0f3a9d5cb 100644
--- a/core/predictor/framework/channel.h
+++ b/core/predictor/framework/channel.h
@@ -72,9 +72,10 @@ class Channel {
const std::string& op() { return _op; }
- int share_to_bus(Bus* bus) {
+ int share_to_bus(Bus* bus, const uint64_t log_id) {
if (bus->regist(_op, this) != 0) {
- LOG(ERROR) << "Failed regist channel[" << _op << "] to bus!";
+ LOG(ERROR) << "(logid=" << log_id << ") Failed regist channel[" << _op
+ << "] to bus!";
return -1;
}
diff --git a/core/predictor/framework/dag.cpp b/core/predictor/framework/dag.cpp
index f039ac70ffe2e55a59f926d754ca411a034058f4..c45952f8fb8f3b6d48c2e1295d6a43d45ad185e5 100644
--- a/core/predictor/framework/dag.cpp
+++ b/core/predictor/framework/dag.cpp
@@ -155,13 +155,11 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) {
}
if (FLAGS_el_log_level == 16) {
- LOG(INFO) << "DAG: " << _dag_name;
- LOG(INFO) << ", Op Num: " << _index_nodes.size();
+ LOG(INFO) << "DAG: " << _dag_name << ", Op Num: " << _index_nodes.size();
for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
DagNode* node = _index_nodes[nid];
- LOG(INFO) << ", OP-" << node->id << "-" << node->name << "-"
- << node->type;
- LOG(INFO) << " depends: " << node->depends.size();
+ LOG(INFO) << "OP-" << node->id << "-" << node->name << "-" << node->type
+ << " depends: " << node->depends.size();
boost::unordered_map::iterator it;
for (it = node->depends.begin(); it != node->depends.end(); it++) {
@@ -214,8 +212,8 @@ int Dag::topo_sort() {
}
}
for (int i = 0; i < in_degree.size(); ++i) {
- LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i
- << "]: " << in_degree[i];
+ VLOG(2) << "(" << _index_nodes[i]->name << ") in_degree[" << i
+ << "]: " << in_degree[i];
}
int sorted_num = 0;
DagStage* stage = new (std::nothrow) DagStage();
diff --git a/core/predictor/framework/dag_view.cpp b/core/predictor/framework/dag_view.cpp
index bde8084b41fee00bc95d2a35444a15258d2a12a8..29a4e97378c20d6f9caae8a97de7dc5f714960e9 100644
--- a/core/predictor/framework/dag_view.cpp
+++ b/core/predictor/framework/dag_view.cpp
@@ -26,7 +26,9 @@ namespace baidu {
namespace paddle_serving {
namespace predictor {
-int DagView::init(Dag* dag, const std::string& service_name) {
+int DagView::init(Dag* dag,
+ const std::string& service_name,
+ const uint64_t log_id) {
_name = dag->name();
_full_name = service_name + NAME_DELIMITER + dag->name();
_bus = butil::get_object();
@@ -36,17 +38,20 @@ int DagView::init(Dag* dag, const std::string& service_name) {
for (uint32_t si = 0; si < stage_size; si++) {
const DagStage* stage = dag->stage_by_index(si);
if (stage == NULL) {
- LOG(ERROR) << "Failed get stage by index:" << si;
+ LOG(ERROR) << "(logid=" << log_id << ") Failed get stage by index:" << si;
return ERR_INTERNAL_FAILURE;
}
ViewStage* vstage = butil::get_object();
if (vstage == NULL) {
- LOG(ERROR) << "Failed get vstage from object pool"
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed get vstage from object pool"
<< "at:" << si;
return ERR_MEM_ALLOC_FAILURE;
}
- VLOG(2) << "stage[" << si << "] name: " << stage->full_name;
- VLOG(2) << "stage[" << si << "] node size: " << stage->nodes.size();
+ VLOG(2) << "(logid=" << log_id << ") stage[" << si
+ << "] name: " << stage->full_name;
+ VLOG(2) << "(logid=" << log_id << ") stage[" << si
+ << "] node size: " << stage->nodes.size();
vstage->full_name = service_name + NAME_DELIMITER + stage->full_name;
uint32_t node_size = stage->nodes.size();
// create tls view node
@@ -54,31 +59,39 @@ int DagView::init(Dag* dag, const std::string& service_name) {
DagNode* node = stage->nodes[ni];
ViewNode* vnode = butil::get_object();
if (vnode == NULL) {
- LOG(ERROR) << "Failed get vnode at:" << ni;
+ LOG(ERROR) << "(logid=" << log_id << ") Failed get vnode at:" << ni;
return ERR_MEM_ALLOC_FAILURE;
}
// factory type
Op* op = OpRepository::instance().get_op(node->type);
if (op == NULL) {
- LOG(ERROR) << "Failed get op with type:" << node->type;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed get op with type:" << node->type;
return ERR_INTERNAL_FAILURE;
}
// initialize a TLS op object
- VLOG(2) << "dag view initialized: \n"
+ VLOG(2) << "(logid=" << log_id << ") dag view initialized: \n"
<< "node id: " << node->id << "\n"
<< "node name: " << node->name << "\n"
<< "node type: " << node->type;
- if (op->init(_bus, dag, node->id, node->name, node->type, node->conf) !=
- 0) {
- LOG(WARNING) << "Failed init op, type:" << node->type;
+ if (op->init(_bus,
+ dag,
+ node->id,
+ node->name,
+ node->type,
+ node->conf,
+ log_id) != 0) {
+ LOG(WARNING) << "(logid=" << log_id
+ << ") Failed init op, type:" << node->type;
return ERR_INTERNAL_FAILURE;
}
op->set_full_name(service_name + NAME_DELIMITER + node->full_name);
// Set the name of the Op as the key of the matching engine.
- VLOG(2) << "op->set_engine_name(" << node->name.c_str() << ")";
+ VLOG(2) << "(logid=" << log_id << ") op->set_engine_name("
+ << node->name.c_str() << ")";
op->set_engine_name(node->name);
vnode->conf = node;
@@ -88,7 +101,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
it != vnode->conf->depends.end();
++it) {
std::string pre_node_name = it->first;
- VLOG(2) << "add op pre name: \n"
+ VLOG(2) << "(logid=" << log_id << ") add op pre name: \n"
<< "current op name: " << vnode->op->op_name()
<< ", previous op name: " << pre_node_name;
vnode->op->add_pre_node_name(pre_node_name);
@@ -102,7 +115,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
//<< " previous op name: "
//<< _view[si - 1]->nodes.back()->op->op_name();
// vstage->nodes.back()->op->set_pre_node_name(
- //_view[si - 1]->nodes.back()->op->op_name());
+ // _view[si - 1]->nodes.back()->op->op_name());
/*}*/
_view.push_back(vstage);
}
@@ -133,14 +146,15 @@ int DagView::deinit() {
return ERR_OK;
}
-int DagView::execute(butil::IOBufBuilder* debug_os) {
+int DagView::execute(const uint64_t log_id, butil::IOBufBuilder* debug_os) {
uint32_t stage_size = _view.size();
for (uint32_t si = 0; si < stage_size; si++) {
- TRACEPRINTF("start to execute stage[%u]", si);
- int errcode = execute_one_stage(_view[si], debug_os);
- TRACEPRINTF("finish to execute stage[%u]", si);
+ TRACEPRINTF("(logid=%" PRIu64 ") start to execute stage[%u]", log_id, si);
+ int errcode = execute_one_stage(_view[si], log_id, debug_os);
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to execute stage[%u]", log_id, si);
if (errcode < 0) {
- LOG(ERROR) << "failed execute stage[" << _view[si]->debug();
+ LOG(ERROR) << "(logid=" << log_id << ") Failed execute stage["
+ << _view[si]->debug();
return errcode;
}
}
@@ -151,29 +165,34 @@ int DagView::execute(butil::IOBufBuilder* debug_os) {
// You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want.
int DagView::execute_one_stage(ViewStage* vstage,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size();
- VLOG(2) << "vstage->nodes.size(): " << node_size;
+ VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf;
Op* op = vnode->op;
- TRACEPRINTF("start to execute op[%s]", op->name());
- int errcode = op->process(debug_os != NULL);
- TRACEPRINTF("finish to execute op[%s]", op->name());
+ TRACEPRINTF(
+ "(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
+ int errcode = op->process(log_id, debug_os != NULL);
+ TRACEPRINTF(
+ "(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) {
- LOG(ERROR) << "Execute failed, Op:" << op->debug_string();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Execute failed, Op:" << op->debug_string();
return errcode;
}
if (errcode > 0) {
- LOG(INFO) << "Execute ignore, Op:" << op->debug_string();
+ LOG(INFO) << "(logid=" << log_id
+ << ") Execute ignore, Op:" << op->debug_string();
continue;
}
if (debug_os) {
- (*debug_os) << "{\"op_name\": \"" << op->name()
+ (*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}";
}
@@ -186,34 +205,34 @@ int DagView::execute_one_stage(ViewStage* vstage,
return ERR_OK;
}
-int DagView::set_request_channel(Channel& request) {
+int DagView::set_request_channel(Channel& request, const uint64_t log_id) {
// Each workflow should get the very beginning
// request (channel), and commit it to bus, for
// the first stage ops consuming.
- request.share_to_bus(_bus);
+ request.share_to_bus(_bus, log_id);
return ERR_OK;
}
-const Channel* DagView::get_response_channel() const {
+const Channel* DagView::get_response_channel(const uint64_t log_id) const {
// Caller obtains response channel from bus, and
// writes it to rpc response(protbuf/json)
if (_view.size() < 1) {
- LOG(ERROR) << "invalid empty view stage!";
+ LOG(ERROR) << "(logid=" << log_id << ") invalid empty view stage!";
return NULL;
}
ViewStage* last_stage = _view[_view.size() - 1];
if (last_stage->nodes.size() != 1 || last_stage->nodes[0] == NULL) {
- LOG(ERROR) << "Invalid last stage, size[" << last_stage->nodes.size()
- << "] != 1";
+ LOG(ERROR) << "(logid=" << log_id << ") Invalid last stage, size["
+ << last_stage->nodes.size() << "] != 1";
return NULL;
}
Op* last_op = last_stage->nodes[0]->op;
if (last_op == NULL) {
- LOG(ERROR) << "Last op is NULL";
+ LOG(ERROR) << "(logid=" << log_id << ") Last op is NULL";
return NULL;
}
return last_op->mutable_channel();
diff --git a/core/predictor/framework/dag_view.h b/core/predictor/framework/dag_view.h
index 4999f64b47eb667e90437d387a5ac5ba5337fc64..8ba9d224c577b475d0a52b79e92f72bd1abaa187 100644
--- a/core/predictor/framework/dag_view.h
+++ b/core/predictor/framework/dag_view.h
@@ -47,21 +47,22 @@ class DagView {
~DagView() {}
- int init(Dag* dag, const std::string& service_name);
+ int init(Dag* dag, const std::string& service_name, const uint64_t log_id);
int deinit();
- int execute(butil::IOBufBuilder* debug_os);
+ int execute(const uint64_t log_id, butil::IOBufBuilder* debug_os);
// The default execution strategy is in sequencing
// You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want.
virtual int execute_one_stage(ViewStage* vstage,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os);
- int set_request_channel(Channel& request); // NOLINT
+ int set_request_channel(Channel& request, const uint64_t log_id); // NOLINT
- const Channel* get_response_channel() const;
+ const Channel* get_response_channel(const uint64_t log_id) const;
const std::string& name() const { return _name; }
diff --git a/core/predictor/framework/factory.h b/core/predictor/framework/factory.h
index 8d5fc9a1c40b047351f38a1136728ee179a191ed..fde95eaa1565c8d0f4fca7f846c7c8a49b383163 100644
--- a/core/predictor/framework/factory.h
+++ b/core/predictor/framework/factory.h
@@ -17,7 +17,7 @@
#include
#include
#include "core/predictor/common/inner_common.h"
-#include "glog/raw_logging.h"
+
namespace baidu {
namespace paddle_serving {
namespace predictor {
@@ -28,7 +28,12 @@ namespace predictor {
FactoryDerive* factory = new (std::nothrow) FactoryDerive(); \
if (factory == NULL || \
FactoryPool::instance().register_factory(tag, factory) != 0) { \
- RAW_LOG_FATAL("Failed regist factory: %s in macro!", #D); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s in macro!", \
+ #D); \
+ RAW_LOG(FATAL, err_str); \
return -1; \
} \
return 0; \
@@ -54,7 +59,13 @@ namespace predictor {
if (factory == NULL || \
::baidu::paddle_serving::predictor::FactoryPool::instance() \
.register_factory(#D, factory) != 0) { \
- RAW_LOG_FATAL("Failed regist factory: %s->%s in macro!", #D, #B); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->%s in macro!", \
+ #D, \
+ #B); \
+ RAW_LOG(FATAL, err_str); \
return; \
} \
return; \
@@ -66,15 +77,26 @@ namespace predictor {
::baidu::paddle_serving::predictor::FactoryDerive* factory = new ( \
::std::nothrow)::baidu::paddle_serving::predictor::FactoryDerive(); \
+ char err_str[ERROR_STRING_LEN]; \
if (factory == NULL || \
::baidu::paddle_serving::predictor::FactoryPool::instance() \
.register_factory(N, factory) != 0) { \
- RAW_LOG_FATAL( \
- "Failed regist factory: %s->%s, tag: %s in macro!", #D, #B, N); \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->%s, tag: %s in macro!", \
+ #D, \
+ #B, \
+ N); \
+ RAW_LOG(FATAL, err_str); \
return; \
} \
- RAW_LOG_WARNING( \
- "Succ regist factory: %s->%s, tag: %s in macro!", #D, #B, N); \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Succ regist factory: %s->%s, tag: %s in macro!", \
+ #D, \
+ #B, \
+ N); \
+ RAW_LOG(WARNING, err_str); \
return; \
}
@@ -102,24 +124,35 @@ class FactoryPool {
}
int register_factory(const std::string& tag, FactoryBase* factory) {
+ char err_str[ERROR_STRING_LEN];
typename std::map*>::iterator it =
_pool.find(tag);
if (it != _pool.end()) {
- RAW_LOG_FATAL("Insert duplicate with tag: %s", tag.c_str());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Insert duplicate with tag: %s",
+ tag.c_str());
+ RAW_LOG(FATAL, err_str);
return -1;
}
std::pair*>::iterator, bool>
r = _pool.insert(std::make_pair(tag, factory));
if (!r.second) {
- RAW_LOG_FATAL("Failed insert new factory with: %s", tag.c_str());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Failed insert new factory with: %s",
+ tag.c_str());
+ RAW_LOG(FATAL, err_str);
return -1;
}
- RAW_LOG_INFO("Succ insert one factory, tag: %s, base type %s",
- tag.c_str(),
- typeid(B).name());
-
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Succ insert one factory, tag: %s, base type %s",
+ tag.c_str(),
+ typeid(B).name());
+ RAW_LOG(INFO, err_str);
return 0;
}
@@ -127,9 +160,13 @@ class FactoryPool {
typename std::map*>::iterator it =
_pool.find(tag);
if (it == _pool.end() || it->second == NULL) {
- RAW_LOG_FATAL("Not found factory pool, tag: %s, pool size %u",
- tag.c_str(),
- _pool.size());
+ char err_str[ERROR_STRING_LEN];
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Not found factory pool, tag: %s, pool size %u",
+ tag.c_str(),
+ _pool.size());
+ RAW_LOG(FATAL, err_str);
return NULL;
}
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index e8c0ff47d86f081516a35576655f843a28b0591b..431bc456326c1714dce48e2f6321bf58f3e021ce 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -38,6 +38,7 @@ class InferEngineCreationParams {
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
+ _use_trt = false;
}
void set_path(const std::string& path) { _path = path; }
@@ -50,12 +51,16 @@ class InferEngineCreationParams {
_enable_ir_optimization = enable_ir_optimization;
}
+ void set_use_trt(bool use_trt) { _use_trt = use_trt; }
+
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
+ bool use_trt() const { return _use_trt; }
+
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
@@ -86,6 +91,7 @@ class InferEngineCreationParams {
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
+ bool _use_trt;
};
class InferEngine {
@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine {
force_update_static_cache);
}
+ if (conf.has_use_trt()) {
+ _infer_engine_params.set_use_trt(conf.use_trt());
+ }
+
if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine
};
template
+#ifdef WITH_TRT
+class FluidInferEngine : public DBReloadableInferEngine {
+#else
class FluidInferEngine : public CloneDBReloadableInferEngine {
- public:
+#endif
+ public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
@@ -603,14 +617,21 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed generate engine with type:" << engine_type;
return -1;
}
- VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
+#ifndef BCLOUD
+ VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
int tmp = FLAGS_logtostderr;
if (engine->proc_initialize(conf, version) != 0) {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1;
}
- VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
+ VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp;
+#else
+ if (engine->proc_initialize(conf, version) != 0) {
+ LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
+ return -1;
+ }
+#endif
auto r = _versions.insert(std::make_pair(engine->version(), engine));
if (!r.second) {
LOG(ERROR) << "Failed insert item: " << engine->version()
diff --git a/core/predictor/framework/op_repository.h b/core/predictor/framework/op_repository.h
index d27e68c1dbcd98e7393aac6e8b0f001e7300a2bc..bf3b2327cd4a1f0af83c98a5bfe529c37ceb403e 100644
--- a/core/predictor/framework/op_repository.h
+++ b/core/predictor/framework/op_repository.h
@@ -62,7 +62,10 @@ class OpRepository {
template
void regist_op(std::string op_type) {
_repository[op_type] = &OpFactory::instance();
- RAW_LOG_INFO("Succ regist op: %s", op_type.c_str());
+ char err_str[ERROR_STRING_LEN];
+ snprintf(
+ err_str, ERROR_STRING_LEN - 1, "Succ regist op: %s", op_type.c_str());
+ RAW_LOG(INFO, err_str);
}
Op* get_op(std::string op_type);
diff --git a/core/predictor/framework/resource.cpp b/core/predictor/framework/resource.cpp
index ca219519e2dcf20bc961d991e3f2eb0ad060f38f..cdb21097fdf40ca6060d99088ed5649a08507720 100644
--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -17,6 +17,9 @@
#include
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/kv_manager.h"
+#ifdef BCLOUD
+#include "aipe_sec_client.h" // NOLINT
+#endif
namespace baidu {
namespace paddle_serving {
namespace predictor {
@@ -109,6 +112,42 @@ int Resource::initialize(const std::string& path, const std::string& file) {
}
LOG(WARNING) << "Successfully proc initialized mempool wrapper";
+#ifdef WITH_AUTH
+ std::string product_name_str = resource_conf.auth_product_name();
+ std::string container_id_str = resource_conf.auth_container_id();
+
+ char* product_name = new char[product_name_str.size() + 1];
+ snprintf(product_name,
+ product_name_str.size() + 1,
+ "%s",
+ product_name_str.c_str());
+ char* container_id = new char[container_id_str.size() + 1];
+ snprintf(container_id,
+ container_id_str.size() + 1,
+ "%s",
+ container_id_str.c_str());
+
+ aipe_auth_request request;
+ request.product_name = product_name;
+ request.container_id = container_id;
+ request.request_ts = (int64_t)time(NULL);
+
+ LOG(INFO) << "\nEasypack info"
+ << "\nproduct name: " << request.product_name
+ << "\ncontainer_id: " << request.container_id
+ << "\nrequest time stamp: " << request.request_ts;
+
+ aipe_auth_response response;
+ response = check_auth(request);
+
+ if (response.result == 0) {
+ LOG(INFO) << "Authentication succeed.";
+ } else {
+ LOG(ERROR) << "Authentication failed. Error code: " << response.result;
+ return -1;
+ }
+#endif
+
if (FLAGS_enable_model_toolkit) {
int err = 0;
std::string model_toolkit_path = resource_conf.model_toolkit_path();
diff --git a/core/predictor/framework/service.cpp b/core/predictor/framework/service.cpp
index 95c7db9f96a6e78522190e3f522d38669423475b..cb02a3278b37bd76631193fbd78cf026eed633c9 100644
--- a/core/predictor/framework/service.cpp
+++ b/core/predictor/framework/service.cpp
@@ -19,6 +19,7 @@
#include // butil::Timer
#endif
+#include
#include
#include
#include
@@ -135,50 +136,63 @@ const std::string& InferService::name() const { return _infer_service_format; }
// ´®ÐÐÖ´ÐÐÿ¸öworkflow
int InferService::inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
- TRACEPRINTF("start to inference");
+ TRACEPRINTF("(logid=%" PRIu64 ") start to inference", log_id);
// when funtion call begins, framework will reset
// thread local variables&resources automatically.
if (Resource::instance().thread_clear() != 0) {
- LOG(ERROR) << "Failed thread clear whole resource";
+ LOG(ERROR) << "(logid=" << log_id << ") Failed thread clear whole resource";
return ERR_INTERNAL_FAILURE;
}
- TRACEPRINTF("finish to thread clear");
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to thread clear", log_id);
if (_enable_map_request_to_workflow) {
- LOG(INFO) << "enable map request == True";
- std::vector* workflows = _map_request_to_workflow(request);
+ VLOG(2) << "(logid=" << log_id << ") enable map request == True";
+ std::vector* workflows =
+ _map_request_to_workflow(request, log_id);
if (!workflows || workflows->size() == 0) {
- LOG(ERROR) << "Failed to map request to workflow";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed to map request to workflow";
return ERR_INTERNAL_FAILURE;
}
size_t fsize = workflows->size();
for (size_t fi = 0; fi < fsize; ++fi) {
Workflow* workflow = (*workflows)[fi];
if (workflow == NULL) {
- LOG(ERROR) << "Failed to get valid workflow at: " << fi;
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed to get valid workflow at: " << fi;
return ERR_INTERNAL_FAILURE;
}
- TRACEPRINTF("start to execute workflow[%s]", workflow->name().c_str());
- int errcode = _execute_workflow(workflow, request, response, debug_os);
- TRACEPRINTF("finish to execute workflow[%s]", workflow->name().c_str());
+ TRACEPRINTF("(logid=%" PRIu64 ") start to execute workflow[%s]",
+ log_id,
+ workflow->name().c_str());
+ int errcode =
+ _execute_workflow(workflow, request, response, log_id, debug_os);
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to execute workflow[%s]",
+ log_id,
+ workflow->name().c_str());
if (errcode < 0) {
- LOG(ERROR) << "Failed execute workflow[" << workflow->name()
- << "] in:" << name();
+ LOG(ERROR) << "(logid=" << log_id << ") Failed execute workflow["
+ << workflow->name() << "] in:" << name();
return errcode;
}
}
} else {
- LOG(INFO) << "enable map request == False";
- TRACEPRINTF("start to execute one workflow");
+ VLOG(2) << "(logid=" << log_id << ") enable map request == False";
+ TRACEPRINTF("(logid=%" PRIu64 ") start to execute one workflow", log_id);
size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) {
- TRACEPRINTF("start to execute one workflow-%lu", fi);
- int errcode = execute_one_workflow(fi, request, response, debug_os);
- TRACEPRINTF("finish to execute one workflow-%lu", fi);
+ TRACEPRINTF(
+ "(logid=%" PRIu64 ") start to execute one workflow-%lu", log_id, fi);
+ int errcode =
+ execute_one_workflow(fi, request, response, log_id, debug_os);
+ TRACEPRINTF(
+ "(logid=%" PRIu64 ") finish to execute one workflow-%lu", log_id, fi);
if (errcode < 0) {
- LOG(ERROR) << "Failed execute 0-th workflow in:" << name();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed execute 0-th workflow in:" << name();
return errcode;
}
}
@@ -188,26 +202,30 @@ int InferService::inference(const google::protobuf::Message* request,
int InferService::debug(const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
- return inference(request, response, debug_os);
+ return inference(request, response, log_id, debug_os);
}
int InferService::execute_one_workflow(uint32_t index,
const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
if (index >= _flows.size()) {
- LOG(ERROR) << "Faield execute workflow, index: " << index
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Faield execute workflow, index: " << index
<< " >= max:" << _flows.size();
return ERR_OVERFLOW_FAILURE;
}
Workflow* workflow = _flows[index];
- return _execute_workflow(workflow, request, response, debug_os);
+ return _execute_workflow(workflow, request, response, log_id, debug_os);
}
int InferService::_execute_workflow(Workflow* workflow,
const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
butil::Timer workflow_time(butil::Timer::STARTED);
// create and submit beginer channel
@@ -215,54 +233,62 @@ int InferService::_execute_workflow(Workflow* workflow,
req_channel.init(0, START_OP_NAME);
req_channel = request;
- DagView* dv = workflow->fetch_dag_view(full_name());
- dv->set_request_channel(req_channel);
+ DagView* dv = workflow->fetch_dag_view(full_name(), log_id);
+ dv->set_request_channel(req_channel, log_id);
// call actual inference interface
- int errcode = dv->execute(debug_os);
+ int errcode = dv->execute(log_id, debug_os);
if (errcode < 0) {
- LOG(ERROR) << "Failed execute dag for workflow:" << workflow->name();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed execute dag for workflow:" << workflow->name();
return errcode;
}
- TRACEPRINTF("finish to dv execute");
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to dv execute", log_id);
// create ender channel and copy
- const Channel* res_channel = dv->get_response_channel();
+ const Channel* res_channel = dv->get_response_channel(log_id);
+ if (res_channel == NULL) {
+ LOG(ERROR) << "(logid=" << log_id << ") Failed get response channel";
+ return ERR_INTERNAL_FAILURE;
+ }
+
if (!_merger || !_merger->merge(res_channel->message(), response)) {
- LOG(ERROR) << "Failed merge channel res to response";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed merge channel res to response";
return ERR_INTERNAL_FAILURE;
}
- TRACEPRINTF("finish to copy from");
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to copy from", log_id);
workflow_time.stop();
- LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed();
+ LOG(INFO) << "(logid=" << log_id
+ << ") workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
// return tls data to object pool
workflow->return_dag_view(dv);
- TRACEPRINTF("finish to return dag view");
+ TRACEPRINTF("(logid=%" PRIu64 ") finish to return dag view", log_id);
return ERR_OK;
}
std::vector* InferService::_map_request_to_workflow(
- const google::protobuf::Message* request) {
+ const google::protobuf::Message* request, const uint64_t log_id) {
const google::protobuf::Descriptor* desc = request->GetDescriptor();
const google::protobuf::FieldDescriptor* field =
desc->FindFieldByName(_request_field_key);
if (field == NULL) {
- LOG(ERROR) << "No field[" << _request_field_key << "] in ["
- << desc->full_name() << "].";
+ LOG(ERROR) << "(logid=" << log_id << ") No field[" << _request_field_key
+ << "] in [" << desc->full_name() << "].";
return NULL;
}
if (field->is_repeated()) {
- LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key
- << "] is repeated.";
+ LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
+ << _request_field_key << "] is repeated.";
return NULL;
}
if (field->cpp_type() != google::protobuf::FieldDescriptor::CPPTYPE_STRING) {
- LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key
- << "] should be string";
+ LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
+ << _request_field_key << "] should be string";
return NULL;
}
const std::string& field_value =
@@ -270,7 +296,7 @@ std::vector* InferService::_map_request_to_workflow(
std::vector* p_workflow =
_request_to_workflow_map.seek(field_value);
if (p_workflow == NULL) {
- LOG(ERROR) << "cannot find key[" << field_value
+ LOG(ERROR) << "(logid=" << log_id << ") cannot find key[" << field_value
<< "] in _request_to_workflow_map";
return NULL;
}
diff --git a/core/predictor/framework/service.h b/core/predictor/framework/service.h
index ef6d3a3a468d1fc47c3012ad5d664bb64595a52c..d3fb0b988f002ab68d28173f9993c02b8eb76813 100644
--- a/core/predictor/framework/service.h
+++ b/core/predictor/framework/service.h
@@ -52,25 +52,29 @@ class InferService {
// Execute each workflow serially
virtual int inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os = NULL);
int debug(const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os);
int execute_one_workflow(uint32_t index,
const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os);
private:
int _execute_workflow(Workflow* workflow,
const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os);
std::vector* _map_request_to_workflow(
- const google::protobuf::Message* request);
+ const google::protobuf::Message* request, const uint64_t log_id);
private:
std::vector _flows;
@@ -88,6 +92,7 @@ class ParallelInferService : public InferService {
// Execute workflows in parallel
int inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
+ const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
return 0;
}
diff --git a/core/predictor/framework/service_manager.h b/core/predictor/framework/service_manager.h
index fa5e872625739ce233d7dd5efe11e1a0fa61d49d..b6b301dd3dc88dc064e0b17739fa059f3366f023 100644
--- a/core/predictor/framework/service_manager.h
+++ b/core/predictor/framework/service_manager.h
@@ -23,17 +23,24 @@ namespace predictor {
#define REGIST_FORMAT_SERVICE(svr_name, svr) \
do { \
+ char err_str[ERROR_STRING_LEN]; \
int ret = \
::baidu::paddle_serving::predictor::FormatServiceManager::instance() \
.regist_service(svr_name, svr); \
if (ret != 0) { \
- RAW_LOG_ERROR("Failed regist service[%s][%s]", \
- svr_name.c_str(), \
- typeid(svr).name()); \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist service[%s][%s]", \
+ svr_name.c_str(), \
+ typeid(svr).name()); \
+ RAW_LOG(ERROR, err_str); \
} else { \
- RAW_LOG_INFO("Success regist service[%s][%s]", \
- svr_name.c_str(), \
- typeid(svr).name()); \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Success regist service[%s][%s]", \
+ svr_name.c_str(), \
+ typeid(svr).name()); \
+ RAW_LOG(INFO, err_str); \
} \
} while (0)
@@ -42,31 +49,46 @@ class FormatServiceManager {
typedef google::protobuf::Service Service;
int regist_service(const std::string& svr_name, Service* svr) {
+ char err_str[ERROR_STRING_LEN];
if (_service_map.find(svr_name) != _service_map.end()) {
- RAW_LOG_ERROR("Service[%s][%s] already exist!",
- svr_name.c_str(),
- typeid(svr).name());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Service[%s][%s] already exist!",
+ svr_name.c_str(),
+ typeid(svr).name());
+ RAW_LOG(ERROR, err_str);
return -1;
}
std::pair::iterator, bool> ret;
ret = _service_map.insert(std::make_pair(svr_name, svr));
if (ret.second == false) {
- RAW_LOG_ERROR("Service[%s][%s] insert failed!",
- svr_name.c_str(),
- typeid(svr).name());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Service[%s][%s] insert failed!",
+ svr_name.c_str(),
+ typeid(svr).name());
+ RAW_LOG(ERROR, err_str);
return -1;
}
- RAW_LOG_INFO("Service[%s] insert successfully!", svr_name.c_str());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Service[%s] insert successfully!",
+ svr_name.c_str());
+ RAW_LOG(INFO, err_str);
return 0;
}
Service* get_service(const std::string& svr_name) {
+ char err_str[ERROR_STRING_LEN];
boost::unordered_map::iterator res;
if ((res = _service_map.find(svr_name)) == _service_map.end()) {
- RAW_LOG_WARNING("Service[%s] not found in service manager!",
- svr_name.c_str());
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Service[%s] not found in service manager!",
+ svr_name.c_str());
+ RAW_LOG(WARNING, err_str);
return NULL;
}
return (*res).second;
diff --git a/core/predictor/framework/workflow.cpp b/core/predictor/framework/workflow.cpp
index 16c4a6e9f475bf575f84bd24764d6348ac65120c..147ab36b79330c781c605d2d29ffb04c4f761aa7 100644
--- a/core/predictor/framework/workflow.cpp
+++ b/core/predictor/framework/workflow.cpp
@@ -32,21 +32,22 @@ int Workflow::init(const configure::Workflow& conf) {
return 0;
}
-DagView* Workflow::fetch_dag_view(const std::string& service_name) {
+DagView* Workflow::fetch_dag_view(const std::string& service_name,
+ const uint64_t log_id) {
DagView* view = NULL;
if (_type == "Sequence") {
view = butil::get_object();
} else if (_type == "Parallel") {
view = butil::get_object();
} else {
- LOG(ERROR) << "Unknown dag type:" << _type << "!";
+ LOG(ERROR) << "(logid=" << log_id << ") Unknown dag type:" << _type << "!";
return NULL;
}
if (view == NULL) {
- LOG(ERROR) << "create dag view from pool failed!";
+ LOG(ERROR) << "(logid=" << log_id << ") create dag view from pool failed!";
return NULL;
}
- view->init(&_dag, service_name);
+ view->init(&_dag, service_name, log_id);
return view;
}
diff --git a/core/predictor/framework/workflow.h b/core/predictor/framework/workflow.h
index a4b3ed1dadccaa24cbeb6813ec7bcc18bac2aad8..14e4d567a540a19579208c91d046ba83de1679e3 100644
--- a/core/predictor/framework/workflow.h
+++ b/core/predictor/framework/workflow.h
@@ -36,7 +36,8 @@ class Workflow {
// different apps.
int init(const configure::Workflow& conf);
- DagView* fetch_dag_view(const std::string& service_name);
+ DagView* fetch_dag_view(const std::string& service_name,
+ const uint64_t log_id);
int deinit() { return 0; }
diff --git a/core/predictor/op/op.cpp b/core/predictor/op/op.cpp
index 59ef6aed71977a3f762ff4fbe9480db19cb4057e..33dba2b506543ed1103cb0b456f5f054969f17fa 100644
--- a/core/predictor/op/op.cpp
+++ b/core/predictor/op/op.cpp
@@ -35,7 +35,8 @@ int Op::init(Bus* bus,
uint32_t id,
const std::string& name,
const std::string& type,
- void* conf) {
+ void* conf,
+ const uint64_t log_id) {
_bus = bus;
_dag = dag;
_id = id;
@@ -45,7 +46,8 @@ int Op::init(Bus* bus,
_timer = butil::get_object();
if (!_timer) {
- LOG(ERROR) << "Invalid timerflow in op:" << this->name();
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Invalid timerflow in op:" << this->name();
return -1;
}
@@ -55,7 +57,8 @@ int Op::init(Bus* bus,
Channel* channel = mutable_channel();
if (channel == NULL) {
- LOG(ERROR) << "Failed mutable channel in op: " << this->id() << ", "
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Failed mutable channel in op: " << this->id() << ", "
<< this->name() << "!";
return -1;
}
@@ -96,18 +99,20 @@ int Op::check_time(const char* tag) {
return 0;
}
-int Op::process(bool debug) {
+int Op::process(const uint64_t log_id, bool debug) {
butil::Timer op_time(butil::Timer::STARTED);
if (debug && _timer) {
_timer->start();
}
if (!_has_init) {
- LOG(ERROR) << "Make sure op has been init before inference";
+ LOG(ERROR) << "(logid=" << log_id
+ << ") Make sure op has been init before inference";
return ERR_INTERNAL_FAILURE;
}
if (_has_calc) {
- LOG(INFO) << "Op: " << _name << " already processed before";
+ LOG(INFO) << "(logid=" << log_id << ") Op: " << _name
+ << " already processed before";
return ERR_OK;
}
@@ -143,7 +148,7 @@ int Op::process(bool debug) {
// 3. share output to bus
Channel* channel = mutable_channel();
- channel->share_to_bus(_bus);
+ channel->share_to_bus(_bus, log_id);
// 4. mark has calculated
_has_calc = true;
@@ -156,7 +161,8 @@ int Op::process(bool debug) {
op_time.stop();
PredictorMetric::GetInstance()->update_latency_metric(
OP_METRIC_PREFIX + full_name(), op_time.u_elapsed());
- LOG(INFO) << " " << name() << "_time=[" << op_time.u_elapsed() << "]";
+ LOG(INFO) << "(logid=" << log_id << ") " << name() << "_time=["
+ << op_time.u_elapsed() << "]";
return ERR_OK;
}
diff --git a/core/predictor/op/op.h b/core/predictor/op/op.h
index ae52975fe6f2506fb0bf483318f607df137c8a96..ea700cce164805d04ddd10b72311f068245e2f10 100644
--- a/core/predictor/op/op.h
+++ b/core/predictor/op/op.h
@@ -113,13 +113,14 @@ class Op {
uint32_t id,
const std::string& name,
const std::string& type,
- void* conf);
+ void* conf,
+ const uint64_t log_id);
int deinit();
int check_time(const char* tag);
- int process(bool debug);
+ int process(const uint64_t log_id, bool debug);
std::string time_info();
diff --git a/core/predictor/src/pdserving.cpp b/core/predictor/src/pdserving.cpp
index 157d52cee1adaea0524ebde01f75a90a6b2adc2f..59ec59d9012c94c322eee2ab3f357218deeedbb4 100644
--- a/core/predictor/src/pdserving.cpp
+++ b/core/predictor/src/pdserving.cpp
@@ -202,8 +202,6 @@ int main(int argc, char** argv) {
}
VLOG(2) << "Succ call pthread worker start function";
-#ifndef BCLOUD
-
if (Resource::instance().general_model_initialize(FLAGS_resource_path,
FLAGS_resource_file) != 0) {
LOG(ERROR) << "Failed to initialize general model conf: "
@@ -213,6 +211,7 @@ int main(int argc, char** argv) {
VLOG(2) << "Succ initialize general model";
+#ifndef BCLOUD
// FATAL messages are output to stderr
FLAGS_stderrthreshold = 3;
#endif
diff --git a/core/predictor/tools/seq_generator.cpp b/core/predictor/tools/seq_generator.cpp
index d384b9310a965503358ea3bc80e4fa8c13e7b39a..eb7e7ed7f9a609e0c21be9a2c3d686dd7d9a1abd 100644
--- a/core/predictor/tools/seq_generator.cpp
+++ b/core/predictor/tools/seq_generator.cpp
@@ -12,13 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include
+
#include
#include
#include
+#include
+
#include "core/predictor/framework.pb.h"
#include "quant.h"
#include "seq_file.h"
+inline uint64_t time_diff(const struct timeval &start_time,
+ const struct timeval &end_time) {
+ return (end_time.tv_sec - start_time.tv_sec) * 1000000 +
+ (end_time.tv_usec - start_time.tv_usec);
+}
+
using paddle::framework::proto::VarType;
std::map var_type_size;
void reg_var_types() {
@@ -100,8 +110,8 @@ int dump_parameter(const char *input_file, const char *output_file) {
char *value_buf = new char[value_buf_len];
size_t offset = 0;
for (int64_t i = 0; i < dims[0]; ++i) {
- // std::cout << "key_len " << key_len << " value_len " << value_buf_len <<
- // std::endl;
+ // std::cout << "key_len " << key_len << " value_len " << value_buf_len
+ // << std::endl;
memcpy(value_buf, tensor_buf + offset, value_buf_len);
seq_file_writer.write((char *)&i, sizeof(i), value_buf, value_buf_len);
offset += value_buf_len;
@@ -109,14 +119,14 @@ int dump_parameter(const char *input_file, const char *output_file) {
return 0;
}
-int compress_parameter(const char *file1, const char *file2, int bits) {
+float *read_embedding_table(const char *file1, std::vector &dims) {
std::ifstream is(file1);
// Step 1: is read version, os write version
uint32_t version;
is.read(reinterpret_cast(&version), sizeof(version));
if (version != 0) {
std::cout << "Version number " << version << " not supported" << std::endl;
- return -1;
+ return NULL;
}
std::cout << "Version size: " << sizeof(version) << std::endl;
// Step 2: is read LoD level, os write LoD level
@@ -138,7 +148,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
is.read(reinterpret_cast(&version), sizeof(version));
if (version != 0) {
std::cout << "Version number " << version << " not supported" << std::endl;
- return -1;
+ return NULL;
}
// Step 4: is read Tensor Data, os write min/max/quant data
@@ -149,10 +159,10 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
is.read(reinterpret_cast(buf.get()), size);
if (!desc.ParseFromArray(buf.get(), size)) {
std::cout << "Cannot parse tensor desc" << std::endl;
- return -1;
+ return NULL;
}
// read tensor
- std::vector dims;
+ // std::vector dims;
dims.reserve(static_cast(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
@@ -164,7 +174,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
if (dims.size() != 2) {
std::cout << "Parameter dims not 2D" << std::endl;
- return -1;
+ return NULL;
}
size_t numel = 1;
@@ -176,47 +186,96 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
char *tensor_buf = new char[buf_size];
is.read(static_cast(tensor_buf), buf_size);
float *tensor_float_buf = reinterpret_cast(tensor_buf);
- size_t per_line_size = dims[1] * 1 + 2 * sizeof(float);
- char *tensor_out = new char[per_line_size * dims[0]];
+ return tensor_float_buf;
+}
- float loss = 0;
- float all_loss = 0;
+int compress_parameter_parallel(const char *file1,
+ const char *file2,
+ int bits,
+ int n_threads) {
+#define MIN_THREADS (1)
+#define MAX_THREADS (80)
+ std::vector dims;
+ float *emb_table = read_embedding_table(file1, dims);
+ if (emb_table == NULL || dims.size() != 2) {
+ return -1;
+ }
+ // int64_t dict_size = dims[0]/100000000;
+ int64_t dict_size = dims[0];
+ int64_t emb_size = dims[1];
+ size_t per_line_size = emb_size * 1 + 2 * sizeof(float);
+ n_threads = std::min(std::max(MIN_THREADS, n_threads), MAX_THREADS);
+ int64_t step = dict_size / n_threads;
+ std::vector result;
+ result.reserve(dict_size + 1);
+ double pow2bits = pow(2, bits);
std::cout << "Start Quant" << std::endl;
+ std::vector threads;
+ for (int i = 0; i < n_threads + 1; ++i) {
+ threads.push_back(std::thread([=, &result]() {
+ int64_t start = i * step;
+ int64_t end = (i + 1) * step;
+ if (i == n_threads) {
+ if (start == dict_size) {
+ return;
+ }
+ end = dict_size;
+ }
+ printf("THREAD[%d], index [%ld, %ld), start Quant table...\n",
+ i,
+ start,
+ end);
+ struct timeval quant_start;
+ gettimeofday(&(quant_start), NULL);
+ for (int64_t k = start; k < end; ++k) {
+ float xmin = 0, xmax = 0, loss = 0;
+ char *tensor_temp = new char[per_line_size];
+ greedy_search(
+ emb_table + k * emb_size, xmin, xmax, loss, emb_size, bits);
+ // 得出 loss 最小的时候的 scale
+ float scale = (xmax - xmin) / (pow2bits - 1);
+ char *min_ptr = tensor_temp;
+ char *max_ptr = tensor_temp + sizeof(float);
+ memcpy(min_ptr, &xmin, sizeof(float));
+ memcpy(max_ptr, &xmax, sizeof(float));
+ for (size_t e = 0; e < emb_size; ++e) {
+ float x = *(emb_table + k * emb_size + e);
+ int val = round((x - xmin) / scale);
+ val = std::max(0, val);
+ val = std::min((int)pow2bits - 1, val);
+ *(tensor_temp + 2 * sizeof(float) + e) = val;
+ }
+ result[k] = tensor_temp;
+ if ((k - start) % 10000 == 0) {
+ printf("THREAD[%d], handle line: %ld\n", i, k - start);
+ }
+ }
+ struct timeval quant_end;
+ gettimeofday(&(quant_end), NULL);
+ printf("THREAD[%d], Quantization finished, cost: %lu us!!!\n",
+ i,
+ time_diff(quant_start, quant_end));
+ }));
+ }
+ for (auto &thread : threads) {
+ thread.join();
+ }
SeqFileWriter seq_file_writer(file2);
-
- size_t offset = 0;
-
- for (int64_t i = 0; i < dims[0]; ++i) {
- float xmin = 0, xmax = 0, loss = 0;
- size_t scale = dims[1];
- char *tensor_temp = new char[per_line_size];
- greedy_search(
- tensor_float_buf + i * dims[1], xmin, xmax, loss, scale, bits);
- for (size_t e = 0; e < dims[1]; ++e) {
- float x = *(tensor_float_buf + i * dims[1] + e);
- int val = round((x - xmin) / (xmax - xmin) * (pow(2, bits) - 1));
- val = std::max(0, val);
- val = std::min((int)pow(2, bits) - 1, val);
- char *min_ptr = tensor_temp;
- char *max_ptr = tensor_temp + sizeof(float);
- memcpy(min_ptr, &xmin, sizeof(float));
- memcpy(max_ptr, &xmax, sizeof(float));
- *(tensor_temp + 2 * sizeof(float) + e) = val;
- float unit = (xmax - xmin) / pow(2, bits);
- float trans_val = unit * val + xmin;
- }
- seq_file_writer.write((char *)&i, sizeof(i), tensor_temp, per_line_size);
+ for (int64_t i = 0; i < dict_size; i++) {
+ seq_file_writer.write((char *)&i, sizeof(i), result[i], per_line_size);
}
return 0;
}
int main(int argc, char **argv) {
- if (argc < 3 || argc > 4) {
- std::cout << "Usage: if no compress, please follow:" << std::endl;
- std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
+ if (argc < 3 || argc > 5) {
+ std::cout << "Usage:" << std::endl;
+ std::cout << "if no compress, please follow:" << std::endl;
+ std::cout << " seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
std::cout << "if compress, please follow: " << std::endl;
- std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS"
+ std::cout << " seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS "
+ "[N_THREADS]"
<< std::endl;
- std::cout << "Now it only support 8 bit." << std::endl;
+ std::cout << " Now it only support 8 bit." << std::endl;
return -1;
}
reg_var_types();
@@ -227,7 +286,13 @@ int main(int argc, char **argv) {
}
if (argc == 4) {
std::cout << "generate compressed sparse param sequence file" << std::endl;
- compress_parameter(argv[1], argv[2], atoi(argv[3]));
+ compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), 1);
+ return 0;
+ }
+ if (argc == 5) {
+ std::cout << "parallel generate compressed sparse param sequence file"
+ << std::endl;
+ compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), atoi(argv[4]));
return 0;
}
}
diff --git a/core/sdk-cpp/include/abtest.h b/core/sdk-cpp/include/abtest.h
index 4833325416cfd6418bf33444001917d887f08cc0..47a502745ae8aa6297729a0a3695600402cf5cfe 100644
--- a/core/sdk-cpp/include/abtest.h
+++ b/core/sdk-cpp/include/abtest.h
@@ -50,9 +50,9 @@ class WeightedRandomRender : public EndpointRouterBase {
Factory* factory =
new (std::nothrow) Factory();
if (factory == NULL) {
- RAW_LOG_ERROR(
- "Failed regist factory: WeightedRandomRender->EndpointRouterBase in "
- "macro!");
+ RAW_LOG(ERROR,
+ "Failed regist factory: WeightedRandomRender->EndpointRouterBase "
+ "in macro!");
return -1;
}
@@ -62,9 +62,9 @@ class WeightedRandomRender : public EndpointRouterBase {
// together.
if (FactoryPool::instance().register_factory(
"WeightedRandomRender", factory) != 0) {
- RAW_LOG_INFO(
- "Factory has been registed: "
- "WeightedRandomRender->EndpointRouterBase.");
+ RAW_LOG(INFO,
+ "Factory has been registed: "
+ "WeightedRandomRender->EndpointRouterBase.");
}
return 0;
diff --git a/core/sdk-cpp/include/factory.h b/core/sdk-cpp/include/factory.h
index 4a3d988afcd981dd92eca5f65c3f254d5f2255d5..89c8aae3ef6bd7b296a8a953f2db88786b501352 100644
--- a/core/sdk-cpp/include/factory.h
+++ b/core/sdk-cpp/include/factory.h
@@ -18,7 +18,6 @@
#include
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/stub_impl.h"
-#include "glog/raw_logging.h"
namespace baidu {
namespace paddle_serving {
@@ -28,12 +27,20 @@ namespace sdk_cpp {
namespace brpc = baidu::rpc;
#endif
+#define ERROR_STRING_LEN 10240
+
#define INLINE_REGIST_OBJECT(D, B, E) \
do { \
Factory* factory = new (std::nothrow) Factory(); \
if (factory == NULL || \
FactoryPool::instance().register_factory(#D, factory) != 0) { \
- RAW_LOG_ERROR("Failed regist factory: %s->%s in macro!", #D, #B); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->%s in macro!", \
+ #D, \
+ #B); \
+ RAW_LOG(ERROR, err_str); \
return E; \
} \
} while (0)
@@ -43,7 +50,12 @@ namespace brpc = baidu::rpc;
Factory* factory = new (std::nothrow) Factory(); \
if (factory == NULL || \
FactoryPool::instance().register_factory(tag, factory) != 0) { \
- RAW_LOG_ERROR("Failed regist factory: %s in macro!", #D); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s in macro!", \
+ #D); \
+ RAW_LOG(ERROR, err_str); \
return -1; \
} \
return 0; \
@@ -66,7 +78,13 @@ namespace brpc = baidu::rpc;
if (factory == NULL || \
::baidu::paddle_serving::sdk_cpp::FactoryPool::instance() \
.register_factory(#D, factory) != 0) { \
- RAW_LOG_ERROR("Failed regist factory: %s->%s in macro!", #D, #B); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->%s in macro!", \
+ #D, \
+ #B); \
+ RAW_LOG(ERROR, err_str); \
return; \
} \
return; \
@@ -80,8 +98,14 @@ namespace brpc = baidu::rpc;
if (factory == NULL || \
::baidu::paddle_serving::sdk_cpp::FactoryPool::instance() \
.register_factory(T, factory) != 0) { \
- RAW_LOG_ERROR( \
- "Failed regist factory: %s->%s, tag %s in macro!", #D, #B, T); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->%s, tag %s in macro!", \
+ #D, \
+ #B, \
+ T); \
+ RAW_LOG(ERROR, err_str); \
return; \
} \
return; \
@@ -108,8 +132,13 @@ namespace brpc = baidu::rpc;
::baidu::paddle_serving::sdk_cpp::FactoryPool< \
::baidu::paddle_serving::sdk_cpp::Stub>::instance() \
.register_factory(T, factory) != 0) { \
- RAW_LOG_ERROR( \
- "Failed regist factory: %s->Stub, tag: %s in macro!", #D, T); \
+ char err_str[ERROR_STRING_LEN]; \
+ snprintf(err_str, \
+ ERROR_STRING_LEN - 1, \
+ "Failed regist factory: %s->Stub, tag: %s in macro!", \
+ #D, \
+ T); \
+ RAW_LOG(ERROR, err_str); \
return; \
} \
return; \
@@ -146,14 +175,24 @@ class FactoryPool {
typename std::map*>::iterator it =
_pool.find(tag);
if (it != _pool.end()) {
- RAW_LOG_ERROR("Insert duplicate with tag: %s", tag.c_str());
+ char err_str[ERROR_STRING_LEN];
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Insert duplicate with tag: %s",
+ tag.c_str());
+ RAW_LOG(ERROR, err_str);
return -1;
}
std::pair*>::iterator, bool>
r = _pool.insert(std::make_pair(tag, factory));
if (!r.second) {
- RAW_LOG_ERROR("Failed insert new factory with: %s", tag.c_str());
+ char err_str[ERROR_STRING_LEN];
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Failed insert new factory with: %s",
+ tag.c_str());
+ RAW_LOG(ERROR, err_str);
return -1;
}
@@ -164,9 +203,13 @@ class FactoryPool {
typename std::map*>::iterator it =
_pool.find(tag);
if (it == _pool.end() || it->second == NULL) {
- RAW_LOG_ERROR("Not found factory pool, tag: %s, pool size: %u",
- tag.c_str(),
- _pool.size());
+ char err_str[ERROR_STRING_LEN];
+ snprintf(err_str,
+ ERROR_STRING_LEN - 1,
+ "Not found factory pool, tag: %s, pool size: %u",
+ tag.c_str(),
+ _pool.size());
+ RAW_LOG(ERROR, err_str);
return NULL;
}
diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto
index 51c0335a9db896e1260e83915de81e51451a904b..9988b298bdd22210fbe3127b9e4b57c89077f3ff 100644
--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
+ required uint64 log_id = 4 [ default = 0 ];
};
message Response {
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index f4a6639bdb38fac97662084f7d927d24b6179717..cf0bfdf2593ff0274e4bec20d3b1524f2e61241a 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -4,17 +4,27 @@
## Compilation environment requirements
-- OS: CentOS 7
-- GCC: 4.8.2 and later
-- Golang: 1.9.2 and later
-- Git:2.17.1 and later
-- CMake:3.2.2 and later
-- Python:2.7.2 and later / 3.6 and later
-
-It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you:
-
-- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
-- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+| module | version |
+| :--------------------------: | :-------------------------------: |
+| OS | CentOS 7 |
+| gcc | 4.8.5 and later |
+| gcc-c++ | 4.8.5 and later |
+| git | 3.82 and later |
+| cmake | 3.2.0 and later |
+| Python | 2.7.2 and later / 3.6 and later |
+| Go | 1.9.2 and later |
+| git | 2.17.1 and later |
+| glibc-static | 2.17 |
+| openssl-devel | 1.0.2k |
+| bzip2-devel | 1.0.6 and later |
+| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
+| sqlite-devel | 3.7.17 and later |
+| patchelf | 0.9 and later |
+| libXext | 1.3.3 |
+| libSM | 1.2.2 |
+| libXrender | 0.9.10 |
+
+It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you, see [this document](DOCKER_IMAGES.md).
This document will take Python2 as an example to show how to compile Paddle Serving. If you want to compile with Python3, just adjust the Python options of cmake:
@@ -29,6 +39,9 @@ git clone https://github.com/PaddlePaddle/Serving
cd Serving && git submodule update --init --recursive
```
+
+
+
## PYTHONROOT Setting
```shell
@@ -38,13 +51,49 @@ export PYTHONROOT=/usr/
In the default centos7 image we provide, the Python path is `/usr/bin/python`. If you want to use our centos6 image, you need to set it to `export PYTHONROOT=/usr/local/python2.7/`.
+
+
+## Install Python dependencies
+
+```shell
+pip install -r python/requirements.txt
+```
+
+If Python3 is used, replace `pip` with `pip3`.
+
+## GOPATH Setting
+
+
+## Compile Arguments
+
+The default GOPATH is `$HOME/go`, which you can set to other values.
+```shell
+export GOPATH=$HOME/go
+export PATH=$PATH:$GOPATH/bin
+```
+
+## Get go packages
+
+```shell
+go env -w GO111MODULE=on
+go env -w GOPROXY=https://goproxy.cn,direct
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
+go get -u google.golang.org/grpc@v1.33.0
+```
+
+
## Compile Server
### Integrated CPU version paddle inference library
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
+mkdir server-build-cpu && cd server-build-cpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DSERVER=ON ..
make -j10
```
@@ -53,8 +102,30 @@ you can execute `make install` to put targets under directory `./output`, you ne
### Integrated GPU version paddle inference library
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
+mkdir server-build-gpu && cd server-build-gpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+ -DSERVER=ON \
+ -DWITH_GPU=ON ..
+make -j10
+```
+
+### Integrated TRT version paddle inference library
+
+```
+mkdir server-build-trt && cd server-build-trt
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
+ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+ -DSERVER=ON \
+ -DWITH_GPU=ON \
+ -DWITH_TRT=ON ..
make -j10
```
@@ -62,33 +133,54 @@ execute `make install` to put targets under directory `./output`
**Attention:** After the compilation is successful, you need to set the path of `SERVING_BIN`. See [Note](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Note) for details.
+
+
## Compile Client
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
+mkdir client-build && cd client-build
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DCLIENT=ON ..
make -j10
```
execute `make install` to put targets under directory `./output`
+
+
## Compile the App
```bash
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
+mkdir app-build && cd app-build
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DAPP=ON ..
make
```
+
+
## Install wheel package
-Regardless of the client, server or App part, after compiling, install the whl package under `python/dist/`.
+Regardless of the client, server or App part, after compiling, install the whl package in `python/dist/` in the temporary directory(`server-build-cpu`, `server-build-gpu`, `client-build`,`app-build`) of the compilation process.
+
+
## Note
When running the python server, it will check the `SERVING_BIN` environment variable. If you want to use your own compiled binary file, set the environment variable to the path of the corresponding binary file, usually`export SERVING_BIN=${BUILD_DIR}/core/general-server/serving`.
+
+## Verify
+
+Please use the example under `python/examples` to verify.
+
+
+
## CMake Option Description
| Compile Options | Description | Default |
@@ -96,7 +188,9 @@ When running the python server, it will check the `SERVING_BIN` environment vari
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
-| CUDNN_ROOT | Define CuDNN library and header path | |
+| CUDNN_LIBRARY | Define CuDNN library and header path | |
+| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
+| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
@@ -111,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the
- CUDA
- CuDNN
-- NCCL2
+
+To compile the TensorRT version, you need to install the TensorRT library.
Note here:
@@ -121,21 +216,12 @@ Note here:
The following is the base library version matching relationship used by the PaddlePaddle release version for reference:
-| | CUDA | CuDNN | NCCL2 |
-| :----: | :-----: | :----------------------: | :----: |
-| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
-| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
+| | CUDA | CuDNN | TensorRT |
+| :----: | :-----: | :----------------------: | :----: |
+| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
+| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
+| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### How to make the compiler detect the CuDNN library
Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
-
-### How to make the compiler detect the nccl library
-
-After downloading the corresponding version of the nccl2 library from the NVIDIA developer official website and decompressing it, add the following environment variables (take nccl2.1.4 as an example):
-
-```shell
-export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
-export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
-```
diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md
index d8fd277131d7d169c1a47689e15556e5d10a0fdb..b3619d9a38e967a139f850e7a605f713b1a57f95 100644
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -4,17 +4,27 @@
## 编译环境设置
-- OS: CentOS 7
-- GCC: 4.8.2及以上
-- Golang: 1.9.2及以上
-- Git:2.17.1及以上
-- CMake:3.2.2及以上
-- Python:2.7.2及以上 / 3.6及以上
-
-推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境:
-
-- CPU: `hub.baidubce.com/paddlepaddle/serving:latest-devel`,dockerfile: [Dockerfile.devel](../tools/Dockerfile.devel)
-- GPU: `hub.baidubce.com/paddlepaddle/serving:latest-gpu-devel`,dockerfile: [Dockerfile.gpu.devel](../tools/Dockerfile.gpu.devel)
+| 组件 | 版本要求 |
+| :--------------------------: | :-------------------------------: |
+| OS | CentOS 7 |
+| gcc | 4.8.5 and later |
+| gcc-c++ | 4.8.5 and later |
+| git | 3.82 and later |
+| cmake | 3.2.0 and later |
+| Python | 2.7.2 and later / 3.6 and later |
+| Go | 1.9.2 and later |
+| git | 2.17.1 and later |
+| glibc-static | 2.17 |
+| openssl-devel | 1.0.2k |
+| bzip2-devel | 1.0.6 and later |
+| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
+| sqlite-devel | 3.7.17 and later |
+| patchelf | 0.9 |
+| libXext | 1.3.3 |
+| libSM | 1.2.2 |
+| libXrender | 0.9.10 |
+
+推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境,详见[该文档](DOCKER_IMAGES_CN.md)。
本文档将以Python2为例介绍如何编译Paddle Serving。如果您想用Python3进行编译,只需要调整cmake的Python相关选项即可:
@@ -29,6 +39,9 @@ git clone https://github.com/PaddlePaddle/Serving
cd Serving && git submodule update --init --recursive
```
+
+
+
## PYTHONROOT设置
```shell
@@ -38,13 +51,46 @@ export PYTHONROOT=/usr/
我们提供默认Centos7的Python路径为`/usr/bin/python`,如果您要使用我们的Centos6镜像,需要将其设置为`export PYTHONROOT=/usr/local/python2.7/`。
+
+
+## 安装Python依赖
+
+```shell
+pip install -r python/requirements.txt
+```
+
+如果使用 Python3,请以 `pip3` 替换 `pip`。
+
+## GOPATH 设置
+
+默认 GOPATH 设置为 `$HOME/go`,您也可以设置为其他值。
+```shell
+export GOPATH=$HOME/go
+export PATH=$PATH:$GOPATH/bin
+```
+
+## 获取 Go packages
+
+```shell
+go env -w GO111MODULE=on
+go env -w GOPROXY=https://goproxy.cn,direct
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
+go get -u google.golang.org/grpc@v1.33.0
+```
+
+
## 编译Server部分
### 集成CPU版本Paddle Inference Library
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
+mkdir server-build-cpu && cd server-build-cpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DSERVER=ON ..
make -j10
```
@@ -53,8 +99,30 @@ make -j10
### 集成GPU版本Paddle Inference Library
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
+mkdir server-build-gpu && cd server-build-gpu
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+ -DSERVER=ON \
+ -DWITH_GPU=ON ..
+make -j10
+```
+
+### 集成TensorRT版本Paddle Inference Library
+
+```
+mkdir server-build-trt && cd server-build-trt
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
+ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+ -DSERVER=ON \
+ -DWITH_GPU=ON \
+ -DWITH_TRT=ON ..
make -j10
```
@@ -65,29 +133,50 @@ make -j10
## 编译Client部分
``` shell
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
+mkdir client-build && cd client-build
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DCLIENT=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
+
+
## 编译App部分
```bash
-mkdir build && cd build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
+mkdir app-build && cd app-build
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+ -DCMAKE_INSTALL_PREFIX=./output \
+ -DAPP=ON ..
make
```
+
+
## 安装wheel包
-无论是Client端,Server端还是App部分,编译完成后,安装`python/dist/`下的whl包即可。
+无论是Client端,Server端还是App部分,编译完成后,安装编译过程临时目录(`server-build-cpu`、`server-build-gpu`、`client-build`、`app-build`)下的`python/dist/` 中的whl包即可。
+
+
## 注意事项
运行python端Server时,会检查`SERVING_BIN`环境变量,如果想使用自己编译的二进制文件,请将设置该环境变量为对应二进制文件的路径,通常是`export SERVING_BIN=${BUILD_DIR}/core/general-server/serving`。
+
+
+## 如何验证
+
+请使用 `python/examples` 下的例子进行验证。
+
+
+
## CMake选项说明
| 编译选项 | 说明 | 默认 |
@@ -95,7 +184,10 @@ make
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
-| CUDNN_ROOT | Define CuDNN library and header path | |
+| WITH_TRT | Compile Paddle Serving with TensorRT | OFF |
+| CUDNN_LIBRARY | Define CuDNN library and header path | |
+| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
+| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
@@ -110,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
- CUDA
- CuDNN
-- NCCL2
+
+编译TensorRT版本,需要安装TensorRT库。
这里要注意的是:
@@ -119,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考:
-| | CUDA | CuDNN | NCCL2 |
-| :----: | :-----: | :----------------------: | :----: |
-| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
-| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
+| | CUDA | CuDNN | TensorRT |
+| :----: | :-----: | :----------------------: | :----: |
+| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
+| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
+| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### 如何让Paddle Serving编译系统探测到CuDNN库
-从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_ROOT`参数,指定CuDNN库所在路径。
-
-### 如何让Paddle Serving编译系统探测到nccl库
-
-从NVIDIA developer官网下载对应版本nccl2库并解压后,增加如下环境变量 (以nccl2.1.4为例):
-
-```shell
-export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
-export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
-```
+从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_LIBRARY`参数,指定CuDNN库所在路径。
diff --git a/doc/CONTRIBUTE.md b/doc/CONTRIBUTE.md
index 1d0f473ce0edfa6092ac1fe81440b53510d3f7a9..a3bfd0f274623cca0413e3eccf6c34e72a082031 100644
--- a/doc/CONTRIBUTE.md
+++ b/doc/CONTRIBUTE.md
@@ -68,7 +68,7 @@ Paddle Serving uses this [Git branching model](http://nvie.com/posts/a-successfu
1. Build and test
- Users can build Paddle Serving natively on Linux, see the [BUILD steps](doc/INSTALL.md).
+ Users can build Paddle Serving natively on Linux, see the [BUILD steps](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md).
1. Keep pulling
diff --git a/doc/CUBE_LOCAL.md b/doc/CUBE_LOCAL.md
index 4a8859b2958acfd4af5a3474f88afc48f3645c19..175a7037fe02525f3cc5215f71cdba4c12ec2bbd 100644
--- a/doc/CUBE_LOCAL.md
+++ b/doc/CUBE_LOCAL.md
@@ -6,7 +6,8 @@
There are two examples on CTR under python / examples, they are criteo_ctr, criteo_ctr_with_cube. The former is to save the entire model during training, including sparse parameters. The latter is to cut out the sparse parameters and save them into two parts, one is the sparse parameter and the other is the dense parameter. Because the scale of sparse parameters is very large in industrial cases, reaching the order of 10 ^ 9. Therefore, it is not practical to start large-scale sparse parameter prediction on one machine. Therefore, we introduced Baidu's industrial-grade product Cube to provide the sparse parameter service for many years to provide distributed sparse parameter services.
-The local mode of Cube is different from distributed Cube, which is designed to be convenient for developers to use in experiments and demos. If there is a demand for distributed sparse parameter service, please continue reading [Distributed Cube User Guide](./Distributed_Cube) after reading this document (still developing).
+The local mode of Cube is different from distributed Cube, which is designed to be convenient for developers to use in experiments and demos.
+
This document uses the original model without any compression algorithm. If there is a need for a quantitative model to go online, please read the [Quantization Storage on Cube Sparse Parameter Indexing](./CUBE_QUANT.md)
diff --git a/doc/CUBE_LOCAL_CN.md b/doc/CUBE_LOCAL_CN.md
index 2c5b478af1b0fa7eb51d89507431459bb6ed033e..9191fe8f54d3e9695d4da04adb82d3c3d33567b2 100644
--- a/doc/CUBE_LOCAL_CN.md
+++ b/doc/CUBE_LOCAL_CN.md
@@ -6,7 +6,7 @@
在python/examples下有两个关于CTR的示例,他们分别是criteo_ctr, criteo_ctr_with_cube。前者是在训练时保存整个模型,包括稀疏参数。后者是将稀疏参数裁剪出来,保存成两个部分,一个是稀疏参数,另一个是稠密参数。由于在工业级的场景中,稀疏参数的规模非常大,达到10^9数量级。因此在一台机器上启动大规模稀疏参数预测是不实际的,因此我们引入百度多年来在稀疏参数索引领域的工业级产品Cube,提供分布式的稀疏参数服务。
-单机版Cube是分布式Cube的弱化版本,旨在方便开发者做实验和Demo时使用。如果有分布式稀疏参数服务的需求,请在读完此文档之后,继续阅读 [稀疏参数索引服务Cube使用指南](分布式Cube)(正在建设中)。
+
本文档使用的都是未经过任何压缩算法处理的原始模型,如果有量化模型上线需求,请阅读[Cube稀疏参数索引量化存储使用指南](./CUBE_QUANT_CN.md)
diff --git a/doc/CUBE_QUANT.md b/doc/CUBE_QUANT.md
index b191695aed247fcadcf10c4bfe3d72343d6d64d0..870b49fcf0e72b9aba0729fdf762b67e2a7004e1 100644
--- a/doc/CUBE_QUANT.md
+++ b/doc/CUBE_QUANT.md
@@ -42,7 +42,7 @@ cd python/examples/criteo_ctr_with_cube
python local_train.py
cp ../../../build_server/core/predictor/seq_generator seq_generator
cp ../../../build_server/output/bin/cube* ./cube/
-sh cube_prepare_quant.sh &
+sh cube_quant_prepare.sh &
python test_server_quant.py ctr_serving_model_kv &
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
```
diff --git a/doc/CUBE_QUANT_CN.md b/doc/CUBE_QUANT_CN.md
index 023f4d2fe246341688dd69d8978ee42817c7adfd..d8c66968c633708742c636a020ceec905588d20b 100644
--- a/doc/CUBE_QUANT_CN.md
+++ b/doc/CUBE_QUANT_CN.md
@@ -42,7 +42,7 @@ cd python/examples/criteo_ctr_with_cube
python local_train.py
cp ../../../build_server/core/predictor/seq_generator seq_generator
cp ../../../build_server/output/bin/cube* ./cube/
-sh cube_prepare_quant.sh &
+sh cube_quant_prepare.sh &
python test_server_quant.py ctr_serving_model_kv &
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
```
diff --git a/doc/DESIGN_CN.md b/doc/DESIGN_CN.md
index 4059c0ee4814abe2959d02e3a2268ac519951244..e795ad6da36ddd391826b8fa79f5ffd801e82368 100644
--- a/doc/DESIGN_CN.md
+++ b/doc/DESIGN_CN.md
@@ -106,7 +106,7 @@ class FluidFamilyCore {

-关于OP之间的依赖关系,以及通过OP组建workflow,可以参考[从零开始写一个预测服务](CREATING.md)的相关章节
+关于OP之间的依赖关系,以及通过OP组建workflow,可以参考[从零开始写一个预测服务](https://github.com/PaddlePaddle/Serving/blob/develop/doc/deprecated/CREATING.md)的相关章节
服务端实例透视图
diff --git a/doc/DOCKER_IMAGES.md b/doc/DOCKER_IMAGES.md
new file mode 100644
index 0000000000000000000000000000000000000000..47a300eabc85689f9bce7c46c353b35b85db9376
--- /dev/null
+++ b/doc/DOCKER_IMAGES.md
@@ -0,0 +1,42 @@
+# Docker Images
+
+([简体中文](DOCKER_IMAGES_CN.md)|English)
+
+This document maintains a list of docker images provided by Paddle Serving.
+
+## Get docker image
+
+You can get images in two ways:
+
+1. Pull image directly from `hub.baidubce.com ` or `docker.io` through TAG:
+
+ ```shell
+ docker pull hub.baidubce.com/paddlepaddle/serving: # hub.baidubce.com
+ docker pull paddlepaddle/serving: # hub.docker.com
+ ```
+
+2. Building image based on dockerfile
+
+ Create a new folder and copy Dockerfile to this folder, and run the following command:
+
+ ```shell
+ docker build -t : .
+ ```
+
+
+
+
+## Image description
+
+Runtime images cannot be used for compilation.
+
+| Description | OS | TAG | Dockerfile |
+| :----------------------------------------------------------: | :-----: | :--------------------------: | :----------------------------------------------------------: |
+| CPU runtime | CentOS7 | latest | [Dockerfile](../tools/Dockerfile) |
+| CPU development | CentOS7 | latest-devel | [Dockerfile.devel](../tools/Dockerfile.devel) |
+| GPU (cuda9.0-cudnn7) runtime | CentOS7 | latest-cuda9.0-cudnn7 | [Dockerfile.cuda9.0-cudnn7](../tools/Dockerfile.cuda9.0-cudnn7) |
+| GPU (cuda9.0-cudnn7) development | CentOS7 | latest-cuda9.0-cudnn7-devel | [Dockerfile.cuda9.0-cudnn7.devel](../tools/Dockerfile.cuda9.0-cudnn7.devel) |
+| GPU (cuda10.0-cudnn7) runtime | CentOS7 | latest-cuda10.0-cudnn7 | [Dockerfile.cuda10.0-cudnn7](../tools/Dockerfile.cuda10.0-cudnn7) |
+| GPU (cuda10.0-cudnn7) development | CentOS7 | latest-cuda10.0-cudnn7-devel | [Dockerfile.cuda10.0-cudnn7.devel](../tools/Dockerfile.cuda10.0-cudnn7.devel) |
+| CPU development (Used to compile packages on Ubuntu) | CentOS6 | | [Dockerfile.centos6.devel](../tools/Dockerfile.centos6.devel) |
+| GPU (cuda9.0-cudnn7) development (Used to compile packages on Ubuntu) | CentOS6 | | [Dockerfile.centos6.cuda9.0-cudnn7.devel](../tools/Dockerfile.centos6.cuda9.0-cudnn7.devel) |
diff --git a/doc/DOCKER_IMAGES_CN.md b/doc/DOCKER_IMAGES_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..26ef5e8bd8c23a281604e5ff0319416c3e408472
--- /dev/null
+++ b/doc/DOCKER_IMAGES_CN.md
@@ -0,0 +1,42 @@
+# Docker 镜像
+
+(简体中文|[English](DOCKER_IMAGES.md))
+
+该文档维护了 Paddle Serving 提供的镜像列表。
+
+## 获取镜像
+
+您可以通过两种方式获取镜像。
+
+1. 通过 TAG 直接从 `hub.baidubce.com ` 或 `docker.io` 拉取镜像:
+
+ ```shell
+ docker pull hub.baidubce.com/paddlepaddle/serving: # hub.baidubce.com
+ docker pull paddlepaddle/serving: # hub.docker.com
+ ```
+
+2. 基于 Dockerfile 构建镜像
+
+ 建立新目录,复制对应 Dockerfile 内容到该目录下 Dockerfile 文件。执行
+
+ ```shell
+ docker build -t : .
+ ```
+
+
+
+
+## 镜像说明
+
+运行时镜像不能用于开发编译。
+
+| 镜像说明 | 操作系统 | TAG | Dockerfile |
+| -------------------------------------------------- | -------- | ---------------------------- | ------------------------------------------------------------ |
+| CPU 运行镜像 | CentOS7 | latest | [Dockerfile](../tools/Dockerfile) |
+| CPU 开发镜像 | CentOS7 | latest-devel | [Dockerfile.devel](../tools/Dockerfile.devel) |
+| GPU (cuda9.0-cudnn7) 运行镜像 | CentOS7 | latest-cuda9.0-cudnn7 | [Dockerfile.cuda9.0-cudnn7](../tools/Dockerfile.cuda9.0-cudnn7) |
+| GPU (cuda9.0-cudnn7) 开发镜像 | CentOS7 | latest-cuda9.0-cudnn7-devel | [Dockerfile.cuda9.0-cudnn7.devel](../tools/Dockerfile.cuda9.0-cudnn7.devel) |
+| GPU (cuda10.0-cudnn7) 运行镜像 | CentOS7 | latest-cuda10.0-cudnn7 | [Dockerfile.cuda10.0-cudnn7](../tools/Dockerfile.cuda10.0-cudnn7) |
+| GPU (cuda10.0-cudnn7) 开发镜像 | CentOS7 | latest-cuda10.0-cudnn7-devel | [Dockerfile.cuda10.0-cudnn7.devel](../tools/Dockerfile.cuda10.0-cudnn7.devel) |
+| CPU 开发镜像 (用于编译 Ubuntu 包) | CentOS6 | <无> | [Dockerfile.centos6.devel](../tools/Dockerfile.centos6.devel) |
+| GPU (cuda9.0-cudnn7) 开发镜像 (用于编译 Ubuntu 包) | CentOS6 | <无> | [Dockerfile.centos6.cuda9.0-cudnn7.devel](../tools/Dockerfile.centos6.cuda9.0-cudnn7.devel) |
diff --git a/doc/FAQ.md b/doc/FAQ.md
index 3bdd2dfd4739b54bf39b6b3f561c43bab3edabde..00630bd67baef14cfcda18e47a4d5cf8596b6cd0 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -1,15 +1,168 @@
# FAQ
-- Q:如何调整RPC服务的等待时间,避免超时?
-
- A:使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
-
- 示例:
- ```
- from paddle_serving_client import Client
-
- client = Client()
- client.load_client_config(sys.argv[1])
- client.set_rpc_timeout_ms(100000)
- client.connect(["127.0.0.1:9393"])
- ```
+
+
+## 基础知识
+
+#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系?
+
+**A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建。
+
+#### Q: paddle-serving是否支持Int32支持
+
+**A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下
+
+ 0-int64
+
+ 1-float32
+
+ 2-int32
+
+#### Q: paddle-serving是否支持windows和Linux环境下的多线程调用
+
+**A:** 客户端可以发起多线程访问调用服务端
+
+#### Q: paddle-serving如何修改消息大小限制
+
+**A:** 在server端和client但通过FLAGS_max_body_size来扩大数据量限制,单位为字节,默认为64MB
+
+#### Q: paddle-serving客户端目前支持哪些语言
+
+**A:** java c++ python
+
+#### Q: paddle-serving目前支持哪些协议
+
+**A:** http rpc
+
+
+## 编译问题
+
+#### Q: 如何使用自己编译的Paddle Serving进行预测?
+
+**A:** 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
+
+
+
+## 部署问题
+
+#### Q: GPU环境运行Serving报错,GPU count is: 0。
+
+```
+terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
+what():
+--------------------------------------------
+C++ Call Stacks (More useful to developers):
+--------------------------------------------
+0 std::string paddle::platform::GetTraceBackString(std::string const&, char const*, int)
+1 paddle::platform::SetDeviceId(int)
+2 paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const
+3 std::unique_ptr > paddle::CreatePaddlePredictor(paddle::AnalysisConfig const&)
+4 std::unique_ptr > paddle::CreatePaddlePredictor(paddle::AnalysisConfig const&)
+----------------------
+Error Message Summary:
+----------------------
+InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
+[Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211)
+```
+
+**A:** libcuda.so没有链接成功。首先在机器上找到libcuda.so,ldd检查libnvidia版本与nvidia-smi中版本一致(libnvidia-fatbinaryloader.so.418.39,与NVIDIA-SMI 418.39 Driver Version: 418.39),然后用export导出libcuda.so的路径即可(例如libcuda.so在/usr/lib64/,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/)
+
+#### Q: 遇到 GPU not found, please check your environment or use cpu version by "pip install paddle_serving_server"
+
+**A:** 检查环境中是否有N卡:ls /dev/ | grep nvidia
+
+#### Q: 目前Paddle Serving支持哪些镜像环境?
+
+**A:** 目前(0.4.0)仅支持CentOS,具体列表查阅[这里](https://github.com/PaddlePaddle/Serving/blob/develop/doc/DOCKER_IMAGES.md)
+
+#### Q: python编译的GCC版本与serving的版本不匹配
+
+**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
+
+ 2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77)
+
+#### Q: paddle-serving是否支持本地离线安装
+
+**A:** 支持离线部署,需要把一些相关的[依赖包](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md)提前准备安装好
+
+## 预测问题
+
+#### Q: 使用GPU第一次预测时特别慢,如何调整RPC服务的等待时间避免超时?
+
+**A:** GPU第一次预测需要初始化。使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
+
+示例:
+
+```
+from paddle_serving_client import Client
+
+client = Client()
+client.load_client_config(sys.argv[1])
+client.set_rpc_timeout_ms(100000)
+client.connect(["127.0.0.1:9393"])
+```
+
+#### Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
+
+**A:** 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
+
+#### Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (../batch_norm_op.cu:198)
+
+**A:** 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
+
+#### Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
+
+**A:** 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
+
+#### Q: Client端fetch的变量名如何设置
+
+**A:** 可以查看配置文件serving_server_conf.prototxt,获取需要的变量名
+
+#### Q: 如何使用多语言客户端
+
+**A:** 多语言客户端要与多语言服务端配套使用。当前版本下(0.4.0),服务端需要将Server改为MultiLangServer(如果是以命令行启动的话只需要添加--use_multilang参数),Python客户端需要将Client改为MultiLangClient,同时去除load_client_config的过程。[Java客户端参考文档](https://github.com/PaddlePaddle/Serving/blob/develop/doc/JAVA_SDK_CN.md)
+
+#### Q: 如何在Windows下使用Paddle Serving
+
+**A:** 当前版本(0.4.0)在Windows上可以运行多语言RPC客户端,或使用HTTP方式访问。如果使用多语言RPC客户端,需要在Linux环境(比如本机容器,或远程Linux机器)中运行多语言服务端;如果使用HTTP方式,需要在Linux环境中运行普通服务端
+
+#### Q: libnvinfer.so: cannot open shared object file: No such file or directory)
+
+ **A:** 参考该文档安装TensorRT: https://blog.csdn.net/hesongzefairy/article/details/105343525
+
+
+
+## 日志排查
+
+#### Q: 部署和预测中的日志信息在哪里查看?
+
+**A:** server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
+
+client端的日志直接打印到标准输出。
+
+通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
+
+#### Q: paddle-serving启动成功后,相关的日志在哪里设置
+
+**A:** 1)警告是glog组件打印的,告知glog初始化之前日志打印在STDERR
+
+ 2)一般采用GLOG_v方式启动服务同时设置日志级别。
+
+例如:
+```
+GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
+```
+
+
+
+#### Q: (GLOG_v=2下)Server端日志一切正常,但Client端始终得不到正确的预测结果
+
+**A:** 可能是配置文件有问题,检查下配置文件(is_load_tensor,fetch_type等有没有问题)
+
+#### Q: 如何给Server传递Logid
+
+**A:** Logid默认为0(后续应该有自动生成Logid的计划,当前版本0.4.0),Client端通过在predict函数中指定log_id参数传递
+
+
+
+## 性能优化
diff --git a/doc/GRPC_IMPL_CN.md b/doc/GRPC_IMPL_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..7b10907caec98ae5754126a7ec54096cc4cd48af
--- /dev/null
+++ b/doc/GRPC_IMPL_CN.md
@@ -0,0 +1,52 @@
+# gRPC接口
+
+gRPC 接口实现形式类似 Web Service:
+
+
+
+## 与bRPC接口对比
+
+1. gRPC Server 端 `load_model_config` 函数添加 `client_config_path` 参数:
+
+ ```python
+ def load_model_config(self, server_config_paths, client_config_path=None)
+ ```
+
+ 在一些例子中 bRPC Server 端与 bRPC Client 端的配置文件可能是不同的(如 cube local 例子中,Client 端的数据先交给 cube,经过 cube 处理后再交给预测库),所以 gRPC Server 端需要获取 gRPC Client 端的配置;同时为了取消 gRPC Client 端手动加载配置文件的过程,所以设计 gRPC Server 端同时加载两个配置文件。`client_config_path` 默认为 `/serving_server_conf.prototxt`。
+
+2. gRPC Client 端取消 `load_client_config` 步骤:
+
+ 在 `connect` 步骤通过 RPC 获取相应的 prototxt(从任意一个 endpoint 获取即可)。
+
+3. gRPC Client 需要通过 RPC 方式设置 timeout 时间(调用形式与 bRPC Client保持一致)
+
+ 因为 bRPC Client 在 `connect` 后无法更改 timeout 时间,所以当 gRPC Server 收到变更 timeout 的调用请求时会重新创建 bRPC Client 实例以变更 bRPC Client timeout时间,同时 gRPC Client 会设置 gRPC 的 deadline 时间。
+
+ **注意,设置 timeout 接口和 Inference 接口不能同时调用(非线程安全),出于性能考虑暂时不加锁。**
+
+4. gRPC Client 端 `predict` 函数添加 `asyn` 和 `is_python` 参数:
+
+ ```python
+ def predict(self, feed, fetch, need_variant_tag=False, asyn=False, is_python=True)
+ ```
+
+ 其中,`asyn` 为异步调用选项。当 `asyn=True` 时为异步调用,返回 `MultiLangPredictFuture` 对象,通过 `MultiLangPredictFuture.result()` 阻塞获取预测值;当 `asyn=Fasle` 为同步调用。
+
+ `is_python` 为 proto 格式选项。当 `is_python=True` 时,基于 numpy bytes 格式进行数据传输,目前只适用于 Python;当 `is_python=False` 时,以普通数据格式传输,更加通用。使用 numpy bytes 格式传输耗时比普通数据格式小很多(详见 [#654](https://github.com/PaddlePaddle/Serving/pull/654))。
+
+5. 异常处理:当 gRPC Server 端的 bRPC Client 预测失败(返回 `None`)时,gRPC Client 端同样返回None。其他 gRPC 异常会在 Client 内部捕获,并在返回的 fetch_map 中添加一个 "status_code" 字段来区分是否预测正常(参考 timeout 样例)。
+
+6. 由于 gRPC 只支持 pick_first 和 round_robin 负载均衡策略,ABTEST 特性还未打齐。
+
+7. 经测试,gRPC 版本可以在 Windows、macOS 平台使用。
+
+8. 计划支持的客户端语言:
+
+ - [x] Python
+ - [ ] Java
+ - [ ] Go
+ - [ ] JavaScript
+
+## Python 端的一些例子
+
+详见 `python/examples/grpc_impl_example` 下的示例文件。
diff --git a/doc/INFERENCE_TO_SERVING.md b/doc/INFERENCE_TO_SERVING.md
new file mode 100644
index 0000000000000000000000000000000000000000..e10ee976fb455c8cc49a0d5fa44ed4cc1f300ba9
--- /dev/null
+++ b/doc/INFERENCE_TO_SERVING.md
@@ -0,0 +1,36 @@
+# How to Convert Paddle Inference Model To Paddle Serving Format
+
+([简体中文](./INFERENCE_TO_SERVING_CN.md)|English)
+
+We should know something before converting to serving model
+
+**inference_model_dir**:the directory of Paddle inference model
+
+**serving_client_dir**: the directory of server side configuration
+
+**serving_client_dir**: the directory of client side configuration
+
+**model_filename**: this is model description file whose default value is `__model__`, if it's not default name, set `model_filename` explicitly
+
+**params_filename**: during `save_inference_model` every Variable will be save as a single file. If we have the inference model whose params are compressed into one file, please set `params_filename` explicitly
+
+
+
+## Example
+
+``` python
+from paddle_serving_client.io import inference_model_to_serving
+inference_model_dir = "your_inference_model"
+serving_client_dir = "serving_client_dir"
+serving_server_dir = "serving_server_dir"
+feed_var_names, fetch_var_names = inference_model_to_serving(
+ inference_model_dir, serving_client_dir, serving_server_dir)
+```
+
+if your model file and params file are both standalone, please use the following api.
+
+```
+feed_var_names, fetch_var_names = inference_model_to_serving(
+ inference_model_dir, serving_client_dir, serving_server_dir,
+ model_filename="model", params_filename="params")
+```
diff --git a/doc/INFERENCE_TO_SERVING_CN.md b/doc/INFERENCE_TO_SERVING_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..e7e909ac04be3b1a0885b3390d99a153dfbd170e
--- /dev/null
+++ b/doc/INFERENCE_TO_SERVING_CN.md
@@ -0,0 +1,33 @@
+# 如何从Paddle保存的预测模型转为Paddle Serving格式可部署的模型
+
+([English](./INFERENCE_TO_SERVING.md)|简体中文)
+
+## 示例
+
+在下列代码中,我们需要知道以下信息。
+
+**模型文件夹**:这个文件夹就是Paddle的inference_model所在的文件夹
+
+**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,服务端配置的保存路径
+
+**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,客户端配置的保存路径
+
+**模型描述文件**: 模型描述文件也就是`model_filename`默认值为`__model__`,是一个pb2文本文件,如果是别的文件名需要显式指定
+
+**模型参数文件**: 在`save_inference_model`阶段,默认方式是每一个Variable保存一个二进制文件,如果是这种情况就不需要做指定。如果所有参数用压缩成一个文件的形式保存,则需要显式指定`params_filename`
+
+
+``` python
+from paddle_serving_client.io import inference_model_to_serving
+inference_model_dir = "your_inference_model"
+serving_client_dir = "serving_client_dir"
+serving_server_dir = "serving_server_dir"
+feed_var_names, fetch_var_names = inference_model_to_serving(
+ inference_model_dir, serving_client_dir, serving_server_dir)
+```
+如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用
+```
+feed_var_names, fetch_var_names = inference_model_to_serving(
+ inference_model_dir, serving_client_dir, serving_server_dir,
+ model_filename="model", params_filename="params")
+```
diff --git a/doc/INFERNCE_TO_SERVING.md b/doc/INFERNCE_TO_SERVING.md
deleted file mode 100644
index 8334159ea255ca65241a2b567e43682a148bb775..0000000000000000000000000000000000000000
--- a/doc/INFERNCE_TO_SERVING.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# How to Convert Paddle Inference Model To Paddle Serving Format
-
-([简体中文](./INFERENCE_TO_SERVING_CN.md)|English)
-
-## Example
-
-``` python
-from paddle_serving_client.io import inference_model_to_serving
-inference_model_dir = "your_inference_model"
-serving_client_dir = "serving_client_dir"
-serving_server_dir = "serving_server_dir"
-feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir)
-```
diff --git a/doc/INFERNCE_TO_SERVING_CN.md b/doc/INFERNCE_TO_SERVING_CN.md
deleted file mode 100644
index 94d1def424db467e200020c69fbd6d1599a5ffde..0000000000000000000000000000000000000000
--- a/doc/INFERNCE_TO_SERVING_CN.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# 如何从Paddle保存的预测模型转为Paddle Serving格式可部署的模型
-
-([English](./INFERENCE_TO_SERVING.md)|简体中文)
-
-## 示例
-
-``` python
-from paddle_serving_client.io import inference_model_to_serving
-inference_model_dir = "your_inference_model"
-serving_client_dir = "serving_client_dir"
-serving_server_dir = "serving_server_dir"
-feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir)
-```
diff --git a/doc/JAVA_SDK.md b/doc/JAVA_SDK.md
new file mode 100644
index 0000000000000000000000000000000000000000..4880e74bfee123b432b6b583a239d2d2ccbb45ac
--- /dev/null
+++ b/doc/JAVA_SDK.md
@@ -0,0 +1,109 @@
+# Paddle Serving Client Java SDK
+
+([简体中文](JAVA_SDK_CN.md)|English)
+
+Paddle Serving provides Java SDK,which supports predict on the Client side with Java language. This document shows how to use the Java SDK.
+
+## Getting started
+
+
+### Prerequisites
+
+```
+- Java 8 or higher
+- Apache Maven
+```
+
+The following table shows compatibilities between Paddle Serving Server and Java SDK.
+
+| Paddle Serving Server version | Java SDK version |
+| :---------------------------: | :--------------: |
+| 0.3.2 | 0.0.1 |
+
+### Install Java SDK
+
+You can download jar and install it to the local Maven repository:
+
+```shell
+wget https://paddle-serving.bj.bcebos.com/jar/paddle-serving-sdk-java-0.0.1.jar
+mvn install:install-file -Dfile=$PWD/paddle-serving-sdk-java-0.0.1.jar -DgroupId=io.paddle.serving.client -DartifactId=paddle-serving-sdk-java -Dversion=0.0.1 -Dpackaging=jar
+```
+
+Or compile from the source code and install it to the local Maven repository:
+
+```shell
+cd Serving/java
+mvn compile
+mvn install
+```
+
+### Maven configure
+
+```text
+
+ io.paddle.serving.client
+ paddle-serving-sdk-java
+ 0.0.1
+
+```
+
+
+
+## Example
+
+Here we will show how to use Java SDK for Boston house price prediction. Please refer to [examples](../java/examples) folder for more examples.
+
+### Get model
+
+```shell
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+tar -xzf uci_housing.tar.gz
+```
+
+### Start Python Server
+
+```shell
+python -m paddle_serving_server.serve --model uci_housing_model --port 9393 --use_multilang
+```
+
+#### Client side code example
+
+```java
+import io.paddle.serving.client.*;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+import java.util.*;
+
+public class PaddleServingClientExample {
+ public static void main( String[] args ) {
+ float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
+ 0.0582f, -0.0727f, -0.1583f, -0.0584f,
+ 0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
+ INDArray npdata = Nd4j.createFromArray(data);
+ HashMap feed_data
+ = new HashMap() {{
+ put("x", npdata);
+ }};
+ List fetch = Arrays.asList("price");
+
+ Client client = new Client();
+ String target = "localhost:9393";
+ boolean succ = client.connect(target);
+ if (succ != true) {
+ System.out.println("connect failed.");
+ return ;
+ }
+
+ Map