提交 770b6c26 编写于 作者: B bjjwwang

Merge branch 'develop' of https://github.com/paddlepaddle/serving into develop

...@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p ...@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference | | `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU | | `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT | | `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### Description of asynchronous model #### Description of asynchronous model
......
...@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p ...@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference | | `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU | | `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT | | `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### 异步模型的说明 #### 异步模型的说明
......
...@@ -61,8 +61,11 @@ else() ...@@ -61,8 +61,11 @@ else()
endif() endif()
if(CUDNN_FOUND) if(CUDNN_FOUND)
if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
endif()
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
......
...@@ -27,52 +27,54 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/ ...@@ -27,52 +27,54 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/
message( "WITH_GPU = ${WITH_GPU}") message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "2.1.0") SET(PADDLE_VERSION "2.2.0-rc0")
if (WITH_GPU) if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0) message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}")
set(CUDA_SUFFIX "cuda11.0-cudnn8-mkl-gcc8.2") # cuda 11.0 is not supported, 11.2 would be added.
if(CUDA_VERSION EQUAL 10.1)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5")
set(WITH_TRT ON) set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.2) elseif(CUDA_VERSION EQUAL 10.2)
set(CUDA_SUFFIX "cuda10.2-cudnn8-mkl-gcc8.2") if(CUDNN_MAJOR_VERSION EQUAL 7)
set(CUDA_SUFFIX "x86-64_gcc5.4_avx_mkl_cuda10.2_cudnn7.6.5_trt6.0.1.5")
set(WITH_TRT ON)
elseif(CUDNN_MAJOR_VERSION EQUAL 8)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4")
set(WITH_TRT ON) set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.1) endif()
set(CUDA_SUFFIX "cuda10.1-cudnn7-mkl-gcc8.2") elseif(CUDA_VERSION EQUAL 11.2)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda11.2_cudnn8.2.1_trt8.0.3.4")
set(WITH_TRT ON) set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.0)
set(CUDA_SUFFIX "cuda10-cudnn7-avx-mkl")
elseif(CUDA_VERSION EQUAL 9.0)
set(CUDA_SUFFIX "cuda9-cudnn7-avx-mkl")
endif() endif()
else() else()
set(WITH_TRT OFF) set(WITH_TRT OFF)
endif() endif()
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}")
elseif (WITH_LITE) elseif (WITH_LITE)
if (WITH_XPU) if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu") SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif() endif()
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-mkl") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_mkl")
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-openblas") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_openblas")
endif() endif()
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-noavx-openblas") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_openblas")
endif() endif()
endif() endif()
if(WITH_LITE) if(WITH_LITE)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz") SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ")
else() else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif() endif()
......
...@@ -12,41 +12,97 @@ ...@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true; option java_multiple_files = true;
option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response) {} rpc debug(Request) returns (Response);
}; };
...@@ -22,11 +22,8 @@ message EngineDesc { ...@@ -22,11 +22,8 @@ message EngineDesc {
required string reloadable_type = 4; required string reloadable_type = 4;
required string model_dir = 5; required string model_dir = 5;
repeated int32 gpu_ids = 6; repeated int32 gpu_ids = 6;
required int32 runtime_thread_num = 7; optional string version_file = 7;
required int32 batch_infer_size = 8; optional string version_type = 8;
required int32 enable_batch_align = 9;
optional string version_file = 10;
optional string version_type = 11;
/* /*
* Sparse Parameter Service type. Valid types are: * Sparse Parameter Service type. Valid types are:
...@@ -39,17 +36,34 @@ message EngineDesc { ...@@ -39,17 +36,34 @@ message EngineDesc {
LOCAL = 1; LOCAL = 1;
REMOTE = 2; REMOTE = 2;
} }
optional SparseParamServiceType sparse_param_service_type = 12; optional SparseParamServiceType sparse_param_service_type = 10;
optional string sparse_param_service_table_name = 13; optional string sparse_param_service_table_name = 11;
optional bool enable_memory_optimization = 14; optional bool enable_memory_optimization = 12;
optional bool enable_ir_optimization = 15; optional bool enable_ir_optimization = 13;
optional bool use_trt = 16; optional bool use_trt = 14;
optional bool use_lite = 17; optional bool use_lite = 15;
optional bool use_xpu = 18; optional bool use_xpu = 16;
optional bool use_gpu = 19; optional bool use_gpu = 17;
optional bool combined_model = 20; optional bool combined_model = 18;
optional bool encrypted_model = 21; optional bool encrypted_model = 19;
optional bool gpu_multi_stream = 22; optional bool gpu_multi_stream = 20;
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
* n > 0 means how many Predictor for this engine in Asynchronous task
* scheduling mode.
* "batch_infer_size": the max batch for this engine in Asynchronous task
* scheduling mode.
* "enable_overrun": always put a whole task into the TaskQueue even if the
* total batch is bigger than "batch_infer_size".
* "allow_split_request": allow to split task(which is corresponding to
* request).
*/
optional int32 runtime_thread_num = 30 [ default = 0 ];
optional int32 batch_infer_size = 31 [ default = 32 ];
optional bool enable_overrun = 32 [ default = false ];
optional bool allow_split_request = 33 [ default = true ];
}; };
// model_toolkit conf // model_toolkit conf
...@@ -61,11 +75,14 @@ message ResourceConf { ...@@ -61,11 +75,14 @@ message ResourceConf {
repeated string model_toolkit_file = 2; repeated string model_toolkit_file = 2;
repeated string general_model_path = 3; repeated string general_model_path = 3;
repeated string general_model_file = 4; repeated string general_model_file = 4;
optional string cube_config_path = 5;
optional string cube_config_file = 6; optional string cube_config_path = 10;
optional int32 cube_quant_bits = 7; // set 0 if no quant. optional string cube_config_file = 11;
optional string auth_product_name = 8; optional int32 cube_quant_bits = 12;
optional string auth_container_id = 9; optional string cube_cache_path = 13;
optional string auth_product_name = 20;
optional string auth_container_id = 21;
}; };
// DAG node depency info // DAG node depency info
......
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
)
func main() {
dict_name := flag.String("n", "test", "cube name")
conf_path := flag.String("c", "./conf/cube.conf", "cube conf path")
input_path := flag.String("i", "./input.json", "keys to seek")
output_path := flag.String("o", "./output.json", "result to save")
flag.Parse()
bytes, err := ioutil.ReadFile(*conf_path)
if err != nil {
fmt.Println("读取配置文件失败", err)
return
}
var meta Meta
err = json.Unmarshal(bytes, &meta.Servers)
if err != nil {
fmt.Println("解析数据失败", err)
return
}
err = meta.Seek(*dict_name, *input_path, *output_path)
if err != nil {
fmt.Println(err)
}
return
}
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
package main
import "fmt"
type Meta struct {
Servers []CubeServer `json:"servers,omitempty"`
}
func (meta *Meta) Seek(dict_name string, input string, output string) (err error) {
var server CubeServer
for _, s := range meta.Servers {
if s.Name == dict_name {
server = s
break
}
}
if server.Name != dict_name {
err = fmt.Errorf("%s server not exist", dict_name)
return err
}
err = server.Seek(input, output)
return err
}
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
)
type Input struct {
Keys []uint64 `json:"keys"`
}
type SingleValue struct {
Status uint32 `json:"status"`
Value string `json:"value"`
}
type Output struct {
Values []SingleValue `json:"values"`
}
type ServerNode struct {
Ip string `json:"ip"`
Port uint64 `json:"port"`
}
type CubeServer struct {
Name string `json:"dict_name"`
Shard uint64 `json:"shard"`
Nodes []ServerNode `json:"nodes"`
}
func (server *CubeServer) SplitKeys(keys []uint64) (splited_keys map[uint64]Input, offset map[uint64][]uint64) {
splited_keys = make(map[uint64]Input)
offset = make(map[uint64][]uint64)
for i, key := range keys {
shard_id := key % server.Shard
temp_split, _ := splited_keys[shard_id]
temp_split.Keys = append(temp_split.Keys, key)
splited_keys[shard_id] = temp_split
temp_offset, _ := offset[shard_id]
temp_offset = append(temp_offset, uint64(i))
offset[shard_id] = temp_offset
}
return splited_keys, offset
}
func (server *CubeServer) Seek(input string, output_path string) (err error) {
file, err := os.Open(input)
if err != nil {
return err
}
defer file.Close()
buf := bufio.NewReader(file)
for {
line, err := buf.ReadBytes('\n')
//line = strings.TrimSpace(line)
if err != nil || io.EOF == err {
break
}
var temp_input Input
json.Unmarshal(line, &temp_input)
key_nums := len(temp_input.Keys)
var output Output
output.Values = make([]SingleValue, key_nums+1)
splited_keys, offset := server.SplitKeys(temp_input.Keys)
for shard_id, keys := range splited_keys {
cur_output, _ := server.Post(shard_id, keys)
for index, single_value := range cur_output.Values {
output.Values[offset[shard_id][index]] = single_value
}
}
json_str, _ := json.Marshal(output)
fp, err := os.OpenFile(output_path, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0755)
if err != nil {
log.Fatal(err)
}
defer fp.Close()
_, err = fp.Write(json_str)
}
return err
}
func (server *CubeServer) Post(shard_id uint64, input Input) (output Output, err error) {
if shard_id >= uint64(len(server.Nodes)) {
err = fmt.Errorf("have no shard:%v", shard_id)
return output, err
}
json_str, _ := json.Marshal(input)
URL := fmt.Sprintf("http://%s:%v/DictService/seek", server.Nodes[shard_id].Ip, server.Nodes[shard_id].Port)
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(json_str))
if err != nil {
return output, err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return output, err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return output, err
}
err = json.Unmarshal(body, &output)
return output, err
}
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
#coding=utf-8
import requests
import sys
import json
class Meta(object):
"""记录cube分片server路由"""
def __init__(self, conf_path):
"""根据配置文件初始化路由"""
self.server_api = "/DictService/seek"
self.server_meta = {}
with open(conf_path, "r", encoding="utf8") as fp:
cube_servcers = json.load(fp)
for server in cube_servcers:
self.server_meta[server["dict_name"]] = server
fp.close()
def seek(self, dict_name, keys_path, save_path):
"""查询"""
save_file = open(save_path, 'w')
with open(keys_path, "r", encoding="utf8") as fp:
lines = fp.readlines()
for line in lines:
json_line = json.loads(line)
values = [{} for i in range(len(json_line["keys"]))]
splited_keys, offset = self.split_keys(json_line)
for shard_id, keys in splited_keys.items():
results = self.post(dict_name, shard_id, keys)
for i, result in enumerate(results["values"]):
values[offset[shard_id][i]] = result
cur_line_results = {}
cur_line_results["values"] = values
json.dump(cur_line_results, save_file)
save_file.write("\n")
fp.close()
save_file.close()
def split_keys(self, json_line):
"""根据key值及分片数判断去哪一个分片上查询"""
keys_split = {}
offset = {}
i = 0
for key in json_line["keys"]:
shard_id = key % self.server_meta[dict_name]["shard"]
if shard_id not in keys_split:
keys_split[shard_id] = []
keys_split[shard_id].append(key)
if shard_id not in offset:
offset[shard_id] = []
offset[shard_id].append(i)
i += 1
return keys_split, offset
def post(self, dict_name, shard_id, keys):
"""向分片server发送post请求"""
api = "http://%s:%s%s" % (self.server_meta[dict_name]["nodes"][shard_id]["ip"],
self.server_meta[dict_name]["nodes"][shard_id]["port"],
self.server_api)
data = {"keys": keys}
response = requests.post(api, json.dumps(data))
return response.json()
if __name__ == '__main__':
if len(sys.argv) != 5:
print('please usage: python demo.py conf_path dict_name keys_path save_path')
exit(0)
conf_path = sys.argv[1]
dict_name = sys.argv[2]
keys_path = sys.argv[3]
save_path = sys.argv[4]
meta = Meta(conf_path)
meta.seek(dict_name, keys_path, save_path)
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
\ No newline at end of file
# cube python api说明文档
参考[大规模稀疏参数服务Cube的部署和使用](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md#2-大规模稀疏参数服务cube的部署和使用)文档进行cube的部署。
使用python api,可替代上述文档中第3节预测服务的部署、使用
## 配置说明
conf/cube.conf 以json格式,设置各个分片cube server的ip以及port,shard与分片数一致,示例:
```bash
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
```
## 数据格式
```bash
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
```
支持批量查询,每次查询一行
## 使用
```bash
cd ./python-api
python3 demo.py conf/cube.conf test input.json result.json
```
\ No newline at end of file
{"values": [{"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}]}
{"values": [{"status": 4294967295, "value": ""}]}
...@@ -3,3 +3,24 @@ add_subdirectory(pybind11) ...@@ -3,3 +3,24 @@ add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib) target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif() endif()
if(CLIENT)
FILE(GLOB client_srcs include/*.h src/client.cpp src/brpc_client.cpp)
add_library(client ${client_srcs})
add_dependencies(client utils sdk-cpp)
target_link_libraries(client utils sdk-cpp)
endif()
if(CLIENT)
include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
add_executable(simple_client example/simple_client.cpp)
add_dependencies(simple_client utils sdk-cpp client)
target_link_libraries(simple_client -Wl,--whole-archive
-Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
target_link_libraries(simple_client utils)
target_link_libraries(simple_client sdk-cpp)
target_link_libraries(simple_client client)
endif()
\ No newline at end of file
# 用于Paddle Serving的C++客户端
(简体中文|[English](./README.md))
## 请求BRPC-Server
### 服务端启动
以fit_a_line模型为例,服务端启动与常规BRPC-Server端启动命令一样。
```
cd ../../python/examples/fit_a_line
sh get_data.sh
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
### 客户端预测
客户端目前支持BRPC
目前已经实现了BRPC的封装函数,详见[brpc_client.cpp](./src/brpc_client.cpp)
```
./simple_client --client_conf="uci_housing_client/serving_client_conf.prototxt" --server_port="127.0.0.1:9393" --test_type="brpc" --sample_type="fit_a_line"
```
更多示例详见[simple_client.cpp](./example/simple_client.cpp)
| Argument | Type | Default | Description |
| ---------------------------------------------- | ---- | ------------------------------------ | ----------------------------------------------------- |
| `client_conf` | str | `"serving_client_conf.prototxt"` | Path of client conf |
| `server_port` | str | `"127.0.0.1:9393"` | Exposed ip:port of server |
| `test_type` | str | `"brpc"` | Mode of request "brpc" |
| `sample_type` | str | `"fit_a_line"` | Type of sample include "fit_a_line,bert" |
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <vector>
#include "core/general-client/include/brpc_client.h"
using baidu::paddle_serving::client::ServingClient;
using baidu::paddle_serving::client::ServingBrpcClient;
using baidu::paddle_serving::client::PredictorInputs;
using baidu::paddle_serving::client::PredictorOutputs;
DEFINE_string(server_port, "127.0.0.1:9292", "ip:port");
DEFINE_string(client_conf, "serving_client_conf.prototxt", "Path of client conf");
DEFINE_string(test_type, "brpc", "brpc");
// fit_a_line, bert
DEFINE_string(sample_type, "fit_a_line", "List: fit_a_line, bert");
namespace {
int prepare_fit_a_line(PredictorInputs& input, std::vector<std::string>& fetch_name) {
std::vector<float> float_feed = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
std::vector<int> float_shape = {1, 13};
std::string feed_name = "x";
fetch_name = {"price"};
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
return 0;
}
int prepare_bert(PredictorInputs& input, std::vector<std::string>& fetch_name) {
{
std::vector<float> float_feed(128, 0.0f);
float_feed[0] = 1.0f;
std::vector<int> float_shape = {1, 128, 1};
std::string feed_name = "input_mask";
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "position_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
feed[0] = 101;
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "input_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "segment_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
fetch_name = {"pooled_output"};
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::string url = FLAGS_server_port;
std::string conf = FLAGS_client_conf;
std::string test_type = FLAGS_test_type;
std::string sample_type = FLAGS_sample_type;
LOG(INFO) << "url = " << url << ";"
<< "client_conf = " << conf << ";"
<< "test_type = " << test_type
<< "sample_type = " << sample_type;
std::unique_ptr<ServingClient> client;
// default type is brpc
// will add grpc&http in the future
if (test_type == "brpc") {
client.reset(new ServingBrpcClient());
} else {
client.reset(new ServingBrpcClient());
}
std::vector<std::string> confs;
confs.push_back(conf);
if (client->init(confs, url) != 0) {
LOG(ERROR) << "Failed to init client!";
return 0;
}
PredictorInputs input;
PredictorOutputs output;
std::vector<std::string> fetch_name;
if (sample_type == "fit_a_line") {
prepare_fit_a_line(input, fetch_name);
}
else if (sample_type == "bert") {
prepare_bert(input, fetch_name);
}
else {
prepare_fit_a_line(input, fetch_name);
}
if (client->predict(input, output, fetch_name, 0) != 0) {
LOG(ERROR) << "Failed to predict!";
}
else {
LOG(INFO) << output.print();
}
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi;
namespace baidu {
namespace paddle_serving {
namespace client {
class ServingBrpcClient : public ServingClient {
public:
ServingBrpcClient() {};
~ServingBrpcClient() {};
virtual int connect(const std::string server_port);
int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id);
private:
// generate default SDKConf
std::string gen_desc(const std::string server_port);
private:
PredictorApi _api;
Predictor* _predictor;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <memory>
namespace baidu {
namespace paddle_serving {
namespace predictor {
namespace general_model {
class Request;
class Response;
}
}
namespace client {
class PredictorInputs;
class PredictorOutputs;
class ServingClient {
public:
ServingClient() {};
virtual ~ServingClient() = default;
int init(const std::vector<std::string>& client_conf,
const std::string server_port);
int load_client_config(const std::vector<std::string>& client_conf);
virtual int connect(const std::string server_port) = 0;
virtual int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) = 0;
protected:
std::map<std::string, int> _feed_name_to_idx;
std::vector<std::string> _feed_name;
std::map<std::string, int> _fetch_name_to_idx;
std::map<std::string, std::string> _fetch_name_to_var_name;
std::map<std::string, int> _fetch_name_to_type;
std::vector<std::vector<int>> _shape;
std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
};
class PredictorData {
public:
PredictorData() {};
virtual ~PredictorData() {};
void add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 1);
void add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 0);
void add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 2);
void add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 20);
const std::map<std::string, std::vector<float>>& float_data_map() const {
return _float_data_map;
};
std::map<std::string, std::vector<float>>* mutable_float_data_map() {
return &_float_data_map;
};
const std::map<std::string, std::vector<int64_t>>& int64_data_map() const {
return _int64_data_map;
};
std::map<std::string, std::vector<int64_t>>* mutable_int64_data_map() {
return &_int64_data_map;
};
const std::map<std::string, std::vector<int32_t>>& int_data_map() const {
return _int32_data_map;
};
std::map<std::string, std::vector<int32_t>>* mutable_int_data_map() {
return &_int32_data_map;
};
const std::map<std::string, std::string>& string_data_map() const {
return _string_data_map;
};
std::map<std::string, std::string>* mutable_string_data_map() {
return &_string_data_map;
};
const std::map<std::string, std::vector<int>>& shape_map() const {
return _shape_map;
};
std::map<std::string, std::vector<int>>* mutable_shape_map() {
return &_shape_map;
};
const std::map<std::string, std::vector<int>>& lod_map() const {
return _lod_map;
};
std::map<std::string, std::vector<int>>* mutable_lod_map() {
return &_lod_map;
};
int get_datatype(std::string name) const;
void set_datatype(std::string name, int type);
std::string print();
private:
// used to print vector data map e.g. _float_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, std::vector<T2>>& map) {
std::ostringstream oss;
oss.str("");
oss.precision(6);
oss.setf(std::ios::fixed);
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, std::vector<T2>>::const_iterator it = map.begin();
typename std::map<T1, std::vector<T2>>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg;
const std::vector<T2>& v = it->second;
oss << v.size() << key_seg;
for (size_t i = 0; i < v.size(); ++i) {
if (i != v.size() - 1) {
oss << v[i] << val_seg;
}
else {
oss << v[i];
}
}
oss << "}";
}
return oss.str();
};
// used to print data map without vector e.g. _string_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, T2>& map) {
std::ostringstream oss;
oss.str("");
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, T2>::const_iterator it = map.begin();
typename std::map<T1, T2>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg
<< "size=" << it->second.size() << key_seg
<< "type=" << this->get_datatype(it->first);
oss << "}";
}
return oss.str();
};
protected:
std::map<std::string, std::vector<float>> _float_data_map;
std::map<std::string, std::vector<int64_t>> _int64_data_map;
std::map<std::string, std::vector<int32_t>> _int32_data_map;
std::map<std::string, std::string> _string_data_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::map<std::string, int> _datatype_map;
};
class PredictorInputs : public PredictorData {
public:
PredictorInputs() {};
virtual ~PredictorInputs() {};
// generate proto from inputs
// feed_name_to_idx: mapping alias name to idx
// feed_name: mapping idx to name
static int GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
predictor::general_model::Request& req);
};
class PredictorOutputs {
public:
struct PredictorOutput {
std::string engine_name;
PredictorData data;
};
PredictorOutputs() {};
virtual ~PredictorOutputs() {};
const std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>& datas() {
return _datas;
};
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>* mutable_datas() {
return &_datas;
};
void add_data(const std::shared_ptr<PredictorOutputs::PredictorOutput>& data) {
_datas.push_back(data);
};
std::string print();
void clear();
// Parse proto to outputs
// fetch_name: name of data to be output
// fetch_name_to_type: mapping of fetch_name to datatype
static int ParseProto(const predictor::general_model::Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs);
protected:
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>> _datas;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
...@@ -51,8 +51,13 @@ class ModelRes { ...@@ -51,8 +51,13 @@ class ModelRes {
res._float_value_map.end()); res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(), _int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end()); res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(),
res._tensor_alias_names.begin(),
res._tensor_alias_names.end());
} }
ModelRes(ModelRes&& res) { ModelRes(ModelRes&& res) {
_engine_name = std::move(res._engine_name); _engine_name = std::move(res._engine_name);
...@@ -65,10 +70,17 @@ class ModelRes { ...@@ -65,10 +70,17 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map))); std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
} }
~ModelRes() {} ~ModelRes() {}
const std::vector<int64_t>& get_int64_by_name(const std::string& name) { const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
...@@ -89,6 +101,12 @@ class ModelRes { ...@@ -89,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) { std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]); return std::move(_int32_value_map[name]);
} }
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) { const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name]; return _shape_map[name];
} }
...@@ -105,6 +123,10 @@ class ModelRes { ...@@ -105,6 +123,10 @@ class ModelRes {
_engine_name = engine_name; _engine_name = engine_name;
} }
const std::string& engine_name() { return _engine_name; } const std::string& engine_name() { return _engine_name; }
const std::vector<std::string>& tensor_alias_names() {
return _tensor_alias_names;
}
ModelRes& operator=(ModelRes&& res) { ModelRes& operator=(ModelRes&& res) {
if (this != &res) { if (this != &res) {
_engine_name = std::move(res._engine_name); _engine_name = std::move(res._engine_name);
...@@ -117,10 +139,17 @@ class ModelRes { ...@@ -117,10 +139,17 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map))); std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
} }
return *this; return *this;
} }
...@@ -130,8 +159,10 @@ class ModelRes { ...@@ -130,8 +159,10 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map; std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map; std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map; std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map; std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map; std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names;
}; };
class PredictorRes { class PredictorRes {
...@@ -168,6 +199,14 @@ class PredictorRes { ...@@ -168,6 +199,14 @@ class PredictorRes {
const std::string& name) { const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
} }
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx, const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) { const std::string& name) {
return _models[model_idx].get_shape_by_name(name); return _models[model_idx].get_shape_by_name(name);
...@@ -193,11 +232,16 @@ class PredictorRes { ...@@ -193,11 +232,16 @@ class PredictorRes {
} }
const std::string& variant_tag() { return _variant_tag; } const std::string& variant_tag() { return _variant_tag; }
const std::vector<std::string>& get_engine_names() { return _engine_names; } const std::vector<std::string>& get_engine_names() { return _engine_names; }
const std::vector<std::string>& get_tensor_alias_names(const int model_idx) {
_tensor_alias_names = _models[model_idx].tensor_alias_names();
return _tensor_alias_names;
}
private: private:
std::vector<ModelRes> _models; std::vector<ModelRes> _models;
std::string _variant_tag; std::string _variant_tag;
std::vector<std::string> _engine_names; std::vector<std::string> _engine_names;
std::vector<std::string> _tensor_alias_names;
}; };
class PredictorClient { class PredictorClient {
...@@ -222,10 +266,14 @@ class PredictorClient { ...@@ -222,10 +266,14 @@ class PredictorClient {
const std::vector<std::string>& float_feed_name, const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape, const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch, const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<py::array_t<int64_t>>& int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string>& int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string>& string_feed, const std::vector<std::string>& string_feed,
const std::vector<std::string>& string_feed_name, const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape, const std::vector<std::vector<int>>& string_shape,
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/brpc_client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/util/include/timer.h"
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
DEFINE_bool(profile_client, false, "");
DEFINE_bool(profile_server, false, "");
#define BRPC_MAX_BODY_SIZE 512 * 1024 * 1024
namespace baidu {
namespace paddle_serving {
namespace client {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
using configure::SDKConf;
using configure::VariantConf;
using configure::Predictor;
using configure::VariantConf;
int ServingBrpcClient::connect(const std::string server_port) {
brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
if (_api.create(gen_desc(server_port)) != 0) {
LOG(ERROR) << "Predictor Creation Failed";
return -1;
}
// _api.thrd_initialize();
return 0;
}
std::string ServingBrpcClient::gen_desc(const std::string server_port) {
// default config for brpc
SDKConf sdk_conf;
Predictor* predictor = sdk_conf.add_predictors();
predictor->set_name("general_model");
predictor->set_service_name("baidu.paddle_serving.predictor.general_model.GeneralModelService");
predictor->set_endpoint_router("WeightedRandomRender");
predictor->mutable_weighted_random_render_conf()->set_variant_weight_list("100");
VariantConf* predictor_var = predictor->add_variants();
predictor_var->set_tag("default_tag_1");
std::string cluster = "list://" + server_port;
predictor_var->mutable_naming_conf()->set_cluster(cluster);
VariantConf* var = sdk_conf.mutable_default_variant_conf();
var->set_tag("default");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_connection_conf()->set_rpc_timeout_ms(200000);
var->mutable_connection_conf()->set_connect_retry_count(2);
var->mutable_connection_conf()->set_max_connection_per_host(100);
var->mutable_connection_conf()->set_hedge_request_timeout_ms(-1);
var->mutable_connection_conf()->set_hedge_fetch_retry_count(2);
var->mutable_connection_conf()->set_connection_type("pooled");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_naming_conf()->set_cluster_filter_strategy("Default");
var->mutable_naming_conf()->set_load_balance_strategy("la");
var->mutable_rpc_parameter()->set_compress_type(0);
var->mutable_rpc_parameter()->set_package_size(20);
var->mutable_rpc_parameter()->set_protocol("baidu_std");
var->mutable_rpc_parameter()->set_max_channel_per_request(3);
return sdk_conf.SerializePartialAsString();
}
int ServingBrpcClient::predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) {
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// thread initialize for StubTLS
_api.thrd_initialize();
std::string variant_tag;
// predictor is bound to request with brpc::Controller
_predictor = _api.fetch_predictor("general_model", &variant_tag);
if (_predictor == NULL) {
LOG(ERROR) << "Failed fetch predictor so predict error!";
return -1;
}
// predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "variant_tag:" << variant_tag;
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
if (PredictorInputs::GenProto(inputs, _feed_name_to_idx, _feed_name, req) != 0) {
LOG(ERROR) << "Failed to preprocess req!";
return -1;
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
return -1;
}
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
if (PredictorOutputs::ParseProto(res, fetch_name, _fetch_name_to_type, outputs) != 0) {
LOG(ERROR) << "Failed to post_process res!";
return -1;
}
postprocess_end = timeline.TimeStampUS();
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "pid:" << getpid() << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
// release predictor
_api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0;
}
} // namespace general_model
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace client {
using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) {
if (load_client_config(client_conf) != 0) {
LOG(ERROR) << "Failed to load client config";
return -1;
}
// pure virtual func, subclass implementation
if (connect(server_port) != 0) {
LOG(ERROR) << "Failed to connect";
return -1;
}
return 0;
}
int ServingClient::load_client_config(const std::vector<std::string> &conf_file) {
try {
GeneralModelConfig model_config;
if (configure::read_proto_conf(conf_file[0].c_str(), &model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[0];
return -1;
}
_feed_name_to_idx.clear();
_fetch_name_to_idx.clear();
_shape.clear();
int feed_var_num = model_config.feed_var_size();
_feed_name.clear();
VLOG(2) << "feed var num: " << feed_var_num;
for (int i = 0; i < feed_var_num; ++i) {
_feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
VLOG(2) << "feed [" << i << "]"
<< " name: " << model_config.feed_var(i).name();
_feed_name.push_back(model_config.feed_var(i).name());
VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
<< " index: " << i;
std::vector<int> tmp_feed_shape;
VLOG(2) << "feed"
<< "[" << i << "] shape:";
for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
tmp_feed_shape.push_back(model_config.feed_var(i).shape(j));
VLOG(2) << "shape[" << j << "]: " << model_config.feed_var(i).shape(j);
}
_type.push_back(model_config.feed_var(i).feed_type());
VLOG(2) << "feed"
<< "[" << i
<< "] feed type: " << model_config.feed_var(i).feed_type();
_shape.push_back(tmp_feed_shape);
}
if (conf_file.size() > 1) {
model_config.Clear();
if (configure::read_proto_conf(conf_file[conf_file.size() - 1].c_str(),
&model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[conf_file.size() - 1];
return -1;
}
}
int fetch_var_num = model_config.fetch_var_size();
VLOG(2) << "fetch_var_num: " << fetch_var_num;
for (int i = 0; i < fetch_var_num; ++i) {
_fetch_name_to_idx[model_config.fetch_var(i).alias_name()] = i;
VLOG(2) << "fetch [" << i << "]"
<< " alias name: " << model_config.fetch_var(i).alias_name();
_fetch_name_to_var_name[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).name();
_fetch_name_to_type[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).fetch_type();
}
} catch (std::exception &e) {
LOG(ERROR) << "Failed load general model config" << e.what();
return -1;
}
return 0;
}
void PredictorData::add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_float_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int64_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int32_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_string_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
int PredictorData::get_datatype(std::string name) const {
std::map<std::string, int>::const_iterator it = _datatype_map.find(name);
if (it != _datatype_map.end()) {
return it->second;
}
return 0;
}
void PredictorData::set_datatype(std::string name, int type) {
_datatype_map[name] = type;
}
std::string PredictorData::print() {
std::string res;
res.append(map2string<std::string, float>(_float_data_map));
res.append(map2string<std::string, int64_t>(_int64_data_map));
res.append(map2string<std::string, int32_t>(_int32_data_map));
res.append(map2string<std::string, std::string>(_string_data_map));
return res;
}
int PredictorInputs::GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
Request& req) {
const std::map<std::string, std::vector<float>>& float_feed_map = inputs.float_data_map();
const std::map<std::string, std::vector<int64_t>>& int64_feed_map = inputs.int64_data_map();
const std::map<std::string, std::vector<int32_t>>& int32_feed_map = inputs.int_data_map();
const std::map<std::string, std::string>& string_feed_map = inputs.string_data_map();
const std::map<std::string, std::vector<int>>& shape_map = inputs.shape_map();
const std::map<std::string, std::vector<int>>& lod_map = inputs.lod_map();
VLOG(2) << "float feed name size: " << float_feed_map.size();
VLOG(2) << "int feed name size: " << int64_feed_map.size();
VLOG(2) << "string feed name size: " << string_feed_map.size();
// batch is already in Tensor.
for (std::map<std::string, std::vector<float>>::const_iterator iter = float_feed_map.begin();
iter != float_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<float>& float_data = iter->second;
const std::vector<int>& float_shape = shape_map.at(name);
const std::vector<int>& float_lod = lod_map.at(name);
// default datatype = P_FLOAT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
VLOG(2) << "prepare float feed " << name << " idx " << idx;
int total_number = float_data.size();
Tensor *tensor = req.add_tensor();
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape.size();
for (uint32_t j = 0; j < float_shape.size(); ++j) {
tensor->add_shape(float_shape[j]);
}
for (uint32_t j = 0; j < float_lod.size(); ++j) {
tensor->add_lod(float_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), float_data.data(), total_number * sizeof(float));
}
for (std::map<std::string, std::vector<int64_t>>::const_iterator iter = int64_feed_map.begin();
iter != int64_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int64_t>& int64_data = iter->second;
const std::vector<int>& int64_shape = shape_map.at(name);
const std::vector<int>& int64_lod = lod_map.at(name);
// default datatype = P_INT64
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int64_data.size();
for (uint32_t j = 0; j < int64_shape.size(); ++j) {
tensor->add_shape(int64_shape[j]);
}
for (uint32_t j = 0; j < int64_lod.size(); ++j) {
tensor->add_lod(int64_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), int64_data.data(), total_number * sizeof(int64_t));
}
for (std::map<std::string, std::vector<int32_t>>::const_iterator iter = int32_feed_map.begin();
iter != int32_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int32_t>& int32_data = iter->second;
const std::vector<int>& int32_shape = shape_map.at(name);
const std::vector<int>& int32_lod = lod_map.at(name);
// default datatype = P_INT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int32_data.size();
for (uint32_t j = 0; j < int32_shape.size(); ++j) {
tensor->add_shape(int32_shape[j]);
}
for (uint32_t j = 0; j < int32_lod.size(); ++j) {
tensor->add_lod(int32_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), int32_data.data(), total_number * sizeof(int32_t));
}
for (std::map<std::string, std::string>::const_iterator iter = string_feed_map.begin();
iter != string_feed_map.end();
++iter) {
std::string name = iter->first;
const std::string& string_data = iter->second;
const std::vector<int>& string_shape = shape_map.at(name);
const std::vector<int>& string_lod = lod_map.at(name);
// default datatype = P_STRING
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
for (uint32_t j = 0; j < string_shape.size(); ++j) {
tensor->add_shape(string_shape[j]);
}
for (uint32_t j = 0; j < string_lod.size(); ++j) {
tensor->add_lod(string_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
if (datatype == P_STRING) {
const int string_shape_size = string_shape.size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_data);
break;
}
}
} else {
tensor->set_tensor_content(string_data);
}
}
return 0;
}
std::string PredictorOutputs::print() {
std::string res = "";
for (size_t i = 0; i < _datas.size(); ++i) {
res.append(_datas[i]->engine_name);
res.append(":");
res.append(_datas[i]->data.print());
res.append("\n");
}
return res;
}
void PredictorOutputs::clear() {
_datas.clear();
}
int PredictorOutputs::ParseProto(const Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs) {
VLOG(2) << "get model output num";
uint32_t model_num = res.outputs_size();
VLOG(2) << "model num: " << model_num;
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
VLOG(2) << "process model output index: " << m_idx;
auto& output = res.outputs(m_idx);
std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
std::make_shared<PredictorOutputs::PredictorOutput>();
predictor_output->engine_name = output.engine_name();
PredictorData& predictor_data = predictor_output->data;
std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
std::map<std::string, std::string>& string_data_map = *predictor_output->data.mutable_string_data_map();
std::map<std::string, std::vector<int>>& shape_map = *predictor_output->data.mutable_shape_map();
std::map<std::string, std::vector<int>>& lod_map = *predictor_output->data.mutable_lod_map();
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
shape_map[name][i] = output.tensor(idx).shape(i);
}
int lod_size = output.tensor(idx).lod_size();
if (lod_size > 0) {
lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
lod_map[name][i] = output.tensor(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "fetch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size();
int64_data_map[name] = std::vector<int64_t>(
output.tensor(idx).int64_data().begin(),
output.tensor(idx).int64_data().begin() + size);
} else if (fetch_name_to_type[name] == P_FLOAT32) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.tensor(idx).float_data_size();
float_data_map[name] = std::vector<float>(
output.tensor(idx).float_data().begin(),
output.tensor(idx).float_data().begin() + size);
} else if (fetch_name_to_type[name] == P_INT32) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.tensor(idx).int_data_size();
int32_data_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8
|| fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
}
predictor_data.set_datatype(name, output.tensor(idx).elem_type());
idx += 1;
}
outputs.add_data(predictor_output);
}
return 0;
}
} // namespace client
} // namespace paddle_serving
} // namespace baidu
...@@ -25,7 +25,22 @@ using baidu::paddle_serving::Timer; ...@@ -25,7 +25,22 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
std::once_flag gflags_init_flag; std::once_flag gflags_init_flag;
namespace py = pybind11; namespace py = pybind11;
...@@ -152,10 +167,14 @@ int PredictorClient::numpy_predict( ...@@ -152,10 +167,14 @@ int PredictorClient::numpy_predict(
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed, const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape, const std::vector<std::vector<int>> &string_shape,
...@@ -168,15 +187,14 @@ int PredictorClient::numpy_predict( ...@@ -168,15 +187,14 @@ int PredictorClient::numpy_predict(
Timer timeline; Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS(); int64_t preprocess_start = timeline.TimeStampUS();
int fetch_name_num = fetch_name.size();
_api.thrd_initialize(); _api.thrd_initialize();
std::string variant_tag; std::string variant_tag;
_predictor = _api.fetch_predictor("general_model", &variant_tag); _predictor = _api.fetch_predictor("general_model", &variant_tag);
predict_res_batch.set_variant_tag(variant_tag); predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done."; VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int32_feed_name.size();
VLOG(2) << "int feed name size: " << int64_feed_name.size();
VLOG(2) << "string feed name size: " << string_feed_name.size(); VLOG(2) << "string feed name size: " << string_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
...@@ -193,7 +211,11 @@ int PredictorClient::numpy_predict( ...@@ -193,7 +211,11 @@ int PredictorClient::numpy_predict(
tensor_vec.push_back(req.add_tensor()); tensor_vec.push_back(req.add_tensor());
} }
for (auto &name : int_feed_name) { for (auto &name : int32_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
for (auto &name : int64_feed_name) {
tensor_vec.push_back(req.add_tensor()); tensor_vec.push_back(req.add_tensor());
} }
...@@ -233,37 +255,63 @@ int PredictorClient::numpy_predict( ...@@ -233,37 +255,63 @@ int PredictorClient::numpy_predict(
} }
vec_idx = 0; vec_idx = 0;
for (auto &name : int_feed_name) { for (auto &name : int32_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) { if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()"; LOG(ERROR) << "idx > tensor_vec.size()";
return -1; return -1;
} }
Tensor *tensor = tensor_vec[idx]; Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes(); int nbytes = int32_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0)); void *rawdata_ptr = (void *)(int32_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size(); int total_number = int32_feed[vec_idx].size();
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { for (uint32_t j = 0; j < int32_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]); tensor->add_shape(int32_shape[vec_idx][j]);
} }
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < int32_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]); tensor->add_lod(int32_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(_type[idx]); tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]); tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0); tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes); memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++;
} }
// Individual INT_64 feed data of int_input to tensor_content
vec_idx = 0;
for (auto &name : int64_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int64_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int64_feed[vec_idx].data(0));
int total_number = int64_feed[vec_idx].size();
for (uint32_t j = 0; j < int64_shape[vec_idx].size(); ++j) {
tensor->add_shape(int64_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int64_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int64_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++; vec_idx++;
} }
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0; vec_idx = 0;
for (auto &name : string_feed_name) { for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
...@@ -279,10 +327,14 @@ int PredictorClient::numpy_predict( ...@@ -279,10 +327,14 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]); tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]); tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
if (_type[idx] != P_STRING) {
tensor->set_elem_type(_type[idx]);
tensor->set_tensor_content(string_feed[vec_idx]);
} else {
tensor->set_elem_type(P_STRING);
const int string_shape_size = string_shape[vec_idx].size(); const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string. // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >. // we pass string via vector<vector<string> >.
...@@ -297,6 +349,7 @@ int PredictorClient::numpy_predict( ...@@ -297,6 +349,7 @@ int PredictorClient::numpy_predict(
break; break;
} }
} }
}
vec_idx++; vec_idx++;
} }
...@@ -308,11 +361,9 @@ int PredictorClient::numpy_predict( ...@@ -308,11 +361,9 @@ int PredictorClient::numpy_predict(
int64_t postprocess_start = 0; int64_t postprocess_start = 0;
int64_t postprocess_end = 0; int64_t postprocess_end = 0;
if (FLAGS_profile_client) {
if (FLAGS_profile_server) { if (FLAGS_profile_server) {
req.set_profile_server(true); req.set_profile_server(true);
} }
}
res.Clear(); res.Clear();
if (_predictor->inference(&req, &res) != 0) { if (_predictor->inference(&req, &res) != 0) {
...@@ -329,10 +380,12 @@ int PredictorClient::numpy_predict( ...@@ -329,10 +380,12 @@ int PredictorClient::numpy_predict(
auto output = res.outputs(m_idx); auto output = res.outputs(m_idx);
ModelRes model; ModelRes model;
model.set_engine_name(output.engine_name()); model.set_engine_name(output.engine_name());
// 在ResponseOp处,已经按照fetch_name对输出数据进行了处理
int idx = 0; // 所以,输出的数据与fetch_name是严格对应的,按顺序处理即可。
for (auto &name : fetch_name) { for (int idx = 0; idx < output.tensor_size(); ++idx) {
// int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
const std::string name = output.tensor(idx).alias_name();
model._tensor_alias_names.push_back(name);
int shape_size = output.tensor(idx).shape_size(); int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size " VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size; << shape_size;
...@@ -347,13 +400,7 @@ int PredictorClient::numpy_predict( ...@@ -347,13 +400,7 @@ int PredictorClient::numpy_predict(
model._lod_map[name][i] = output.tensor(idx).lod(i); model._lod_map[name][i] = output.tensor(idx).lod(i);
} }
} }
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == P_INT64) { if (_fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "ferch var " << name << "type int64"; VLOG(2) << "ferch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size(); int size = output.tensor(idx).int64_data_size();
...@@ -372,8 +419,16 @@ int PredictorClient::numpy_predict( ...@@ -372,8 +419,16 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>( model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size); output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} }
idx += 1;
} }
predict_res_batch.add_model_res(std::move(model)); predict_res_batch.add_model_res(std::move(model));
} }
...@@ -403,6 +458,36 @@ int PredictorClient::numpy_predict( ...@@ -403,6 +458,36 @@ int PredictorClient::numpy_predict(
} }
_api.thrd_clear(); _api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0; return 0;
} }
} // namespace general_model } // namespace general_model
......
...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
}); });
return py::array(ptr->size(), ptr->data(), capsule); return py::array(ptr->size(), ptr->data(), capsule);
}) })
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape", .def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) { [](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>( std::vector<int> *ptr = new std::vector<int>(
...@@ -69,7 +82,10 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -69,7 +82,10 @@ PYBIND11_MODULE(serving_client, m) {
}) })
.def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); }) .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
.def("get_engine_names", .def("get_engine_names",
[](PredictorRes &self) { return self.get_engine_names(); }); [](PredictorRes &self) { return self.get_engine_names(); })
.def("get_tensor_alias_names", [](PredictorRes &self, int model_idx) {
return self.get_tensor_alias_names(model_idx);
});
py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol()) py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
.def(py::init()) .def(py::init())
...@@ -101,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -101,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed, const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape, const std::vector<std::vector<int>> &string_shape,
...@@ -117,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -117,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) {
float_feed_name, float_feed_name,
float_shape, float_shape,
float_lod_slot_batch, float_lod_slot_batch,
int_feed, int32_feed,
int_feed_name, int32_feed_name,
int_shape, int32_shape,
int_lod_slot_batch, int32_lod_slot_batch,
int64_feed,
int64_feed_name,
int64_shape,
int64_lod_slot_batch,
string_feed, string_feed,
string_feed_name, string_feed_name,
string_shape, string_shape,
......
...@@ -191,24 +191,44 @@ int GeneralDetectionOp::inference() { ...@@ -191,24 +191,44 @@ int GeneralDetectionOp::inference() {
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
for (int i = boxes.size() - 1; i >= 0; i--) { float max_wh_ratio = 0.0f;
crop_img = GetRotateCropImage(img, boxes[i]); std::vector<cv::Mat> crop_imgs;
std::vector<cv::Mat> resize_imgs;
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); int max_resize_w = 0;
int max_resize_h = 0;
int box_num = boxes.size();
std::vector<std::vector<float>> output_rec;
for (int i = 0; i < box_num; ++i) {
cv::Mat line_img = GetRotateCropImage(img, boxes[i]);
float wh_ratio = float(line_img.cols) / float(line_img.rows);
max_wh_ratio = max_wh_ratio > wh_ratio ? max_wh_ratio : wh_ratio;
crop_imgs.push_back(line_img);
}
for (int i = 0; i < box_num; ++i) {
cv::Mat resize_img;
crop_img = crop_imgs[i];
this->resize_op_rec.Run( this->resize_op_rec.Run(
crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_); crop_img, resize_img, max_wh_ratio, this->use_tensorrt_);
this->normalize_op_.Run( this->normalize_op_.Run(
&resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_); &resize_img, this->mean_rec, this->scale_rec, this->is_scale_);
std::vector<float> output_rec( max_resize_w = std::max(max_resize_w, resize_img.cols);
1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f); max_resize_h = std::max(max_resize_h, resize_img.rows);
resize_imgs.push_back(resize_img);
}
int buf_size = 3 * max_resize_h * max_resize_w;
output_rec = std::vector<std::vector<float>>(box_num,
std::vector<float>(buf_size, 0.0f));
for (int i = 0; i < box_num; ++i) {
resize_img_rec = resize_imgs[i];
this->permute_op_.Run(&resize_img_rec, output_rec.data()); this->permute_op_.Run(&resize_img_rec, output_rec[i].data());
}
// Inference. // Inference.
output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; output_shape = {box_num, 3, max_resize_h, max_resize_w};
out_num = std::accumulate( out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
databuf_size_out = out_num * sizeof(float); databuf_size_out = out_num * sizeof(float);
...@@ -217,17 +237,19 @@ int GeneralDetectionOp::inference() { ...@@ -217,17 +237,19 @@ int GeneralDetectionOp::inference() {
LOG(ERROR) << "Malloc failed, size: " << databuf_size_out; LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
return -1; return -1;
} }
memcpy(databuf_data_out, output_rec.data(), databuf_size_out); int offset = buf_size * sizeof(float);
for (int i = 0; i < box_num; ++i) {
memcpy(databuf_data_out + i * offset, output_rec[i].data(), offset);
}
databuf_char_out = reinterpret_cast<char*>(databuf_data_out); databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
paddle::PaddleTensor tensor_out; paddle::PaddleTensor tensor_out;
tensor_out.name = "image"; tensor_out.name = "image";
tensor_out.dtype = paddle::PaddleDType::FLOAT32; tensor_out.dtype = paddle::PaddleDType::FLOAT32;
tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; tensor_out.shape = output_shape;
tensor_out.data = paddleBuf; tensor_out.data = paddleBuf;
out->push_back(tensor_out); out->push_back(tensor_out);
} }
}
out->erase(out->begin(), out->begin() + infer_outnum); out->erase(out->begin(), out->begin() + infer_outnum);
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
......
...@@ -63,7 +63,7 @@ class GeneralDetectionOp ...@@ -63,7 +63,7 @@ class GeneralDetectionOp
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 1.5;
std::vector<float> mean_det = {0.485f, 0.456f, 0.406f}; std::vector<float> mean_det = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include "core/cube/cube-api/include/cube_api.h" #include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/framework/cache.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
...@@ -36,6 +37,7 @@ using baidu::paddle_serving::predictor::general_model::Response; ...@@ -36,6 +37,7 @@ using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::CubeCache;
// DistKV Infer Op: seek cube and then call paddle inference // DistKV Infer Op: seek cube and then call paddle inference
// op seq: general_reader-> dist_kv_infer -> general_response // op seq: general_reader-> dist_kv_infer -> general_response
...@@ -70,21 +72,30 @@ int GeneralDistKVInferOp::inference() { ...@@ -70,21 +72,30 @@ int GeneralDistKVInferOp::inference() {
<< ") Failed mutable depended argument, op:" << pre_name; << ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
Timer timeline;
timeline.Start();
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<uint64_t> unique_keys;
std::unordered_map<uint64_t, rec::mcube::CubeValue *> key_map;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; // sparse inputs counts, sparse would seek cube // sparse inputs counts, sparse would seek cube
int dense_count = 0; // dense inputs counts, dense would directly call paddle infer int sparse_count = 0;
// dense inputs counts, dense would directly call paddle infer
int dense_count = 0;
std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs; std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
size_t key_len = 0; size_t key_len = 0;
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) { if (in->at(i).dtype != paddle::PaddleDType::INT64) {
// dense input type is not int64
++dense_count; ++dense_count;
continue; continue;
} }
// sparse input type is int64
++sparse_count; ++sparse_count;
size_t elem_num = 1; size_t elem_num = 1;
for (size_t s = 0; s < in->at(i).shape.size(); ++s) { for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
elem_num *= in->at(i).shape[s]; elem_num *= in->at(i).shape[s];
...@@ -94,7 +105,8 @@ int GeneralDistKVInferOp::inference() { ...@@ -94,7 +105,8 @@ int GeneralDistKVInferOp::inference() {
dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num)); dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
} }
keys.resize(key_len); keys.resize(key_len);
VLOG(3) << "(logid=" << log_id << ") cube number of keys to look up: " << key_len; unique_keys.resize(key_len);
int key_idx = 0; int key_idx = 0;
for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) { for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
std::copy(dataptr_size_pairs[i].first, std::copy(dataptr_size_pairs[i].first,
...@@ -102,20 +114,81 @@ int GeneralDistKVInferOp::inference() { ...@@ -102,20 +114,81 @@ int GeneralDistKVInferOp::inference() {
keys.begin() + key_idx); keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second; key_idx += dataptr_size_pairs[i].second;
} }
// filter dumplicate keys
int unique_keys_count = 0;
for (size_t i = 0; i < keys.size(); ++i) {
if (key_map.find(keys[i]) == key_map.end()) {
key_map[keys[i]] = nullptr;
unique_keys[unique_keys_count++] = keys[i];
}
}
unique_keys.resize(unique_keys_count);
VLOG(1) << "(logid=" << log_id
<< ") cube number of keys to look up: " << key_len
<< " uniq keys: " << unique_keys_count;
// fitler cache keys
size_t hit_counts = 0;
int64_t seek_cache_start = timeline.TimeStampUS();
CubeCache *p_cube_cache =
InferManager::instance().get_cube_cache(engine_name().c_str());
if (p_cube_cache != nullptr) {
for (size_t i = 0; i < unique_keys_count; ++i) {
rec::mcube::CubeValue *hit_val = p_cube_cache->get_data(unique_keys[i]);
if (hit_val) {
// LOG(WARNING) << "Hit one cache. key:" << unique_keys[i];
key_map[unique_keys[i]] = hit_val;
if (hit_counts % 100 == 0) {
LOG(WARNING) << "hit cache! key:" << unique_keys[i]
<< " value:" << hit_val->buff;
}
unique_keys[i] = 0;
++hit_counts;
}
}
} else {
LOG(WARNING) << "get cube cache fail. model: " << engine_name();
}
// clear unique keys which hit caches
if (hit_counts > 0) {
for (auto it = unique_keys.begin(); it < unique_keys.end();) {
if (*it == 0) {
it = unique_keys.erase(it);
--unique_keys_count;
} else {
++it;
}
}
}
int64_t seek_cache_end = timeline.TimeStampUS();
VLOG(2) << "cache hit " << hit_counts
<< " keys in cube cache, last unique_keys:" << unique_keys.size()
<< " , seek_time:" << seek_cache_end - seek_cache_start;
// seek sparse params
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "cube init error or cube config not given.";
return -1; return -1;
} }
// gather keys and seek cube servers, put results in values int64_t seek_start = timeline.TimeStampUS();
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], unique_keys, &values);
VLOG(3) << "(logid=" << log_id << ") cube seek status: " << ret; int64_t seek_end = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id << ") cube seek status: " << ret
<< " , unique_key: " << unique_keys.size()
<< " , seek_time: " << seek_end - seek_start;
for (size_t i = 0; i < unique_keys.size(); ++i) {
key_map[unique_keys[i]] = &values[i];
}
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "cube value return null";
} }
// EMBEDDING_SIZE means the length of sparse vector, user can define length here.
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
// size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float);
//size_t EMBEDDING_SIZE = 9;
TensorVector sparse_out; TensorVector sparse_out;
sparse_out.resize(sparse_count); sparse_out.resize(sparse_count);
TensorVector dense_out; TensorVector dense_out;
...@@ -126,8 +199,10 @@ int GeneralDistKVInferOp::inference() { ...@@ -126,8 +199,10 @@ int GeneralDistKVInferOp::inference() {
std::unordered_map<int, int> in_out_map; std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
std::shared_ptr<PaddleGeneralModelConfig> model_config = resource.get_general_model_config().front(); std::shared_ptr<PaddleGeneralModelConfig> model_config =
//copy data to tnsor resource.get_general_model_config().front();
int cube_key_found = 0;
int cube_key_miss = 0;
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) { if (in->at(i).dtype != paddle::PaddleDType::INT64) {
dense_out[dense_idx] = in->at(i); dense_out[dense_idx] = in->at(i);
...@@ -142,43 +217,75 @@ int GeneralDistKVInferOp::inference() { ...@@ -142,43 +217,75 @@ int GeneralDistKVInferOp::inference() {
sparse_out[sparse_idx].lod[x].begin()); sparse_out[sparse_idx].lod[x].begin());
} }
sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32; sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
sparse_out[sparse_idx].shape.push_back(sparse_out[sparse_idx].lod[0].back()); sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i]; sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float)); EMBEDDING_SIZE * sizeof(float));
float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data()); float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
if (!dst_ptr) {
VLOG(2) << "dst_ptr is null. sparse_idx:" << sparse_idx;
continue;
}
for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) { for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
float *data_ptr = dst_ptr + x * EMBEDDING_SIZE; float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
memcpy(data_ptr, uint64_t cur_key = keys[cube_val_idx];
values[cube_val_idx].buff.data(), rec::mcube::CubeValue *cur_val = key_map[cur_key];
values[cube_val_idx].buff.size()); if (cur_val->buff.size() == 0) {
cube_val_idx++; memset(data_ptr, (float)0.0, sizeof(float) * EMBEDDING_SIZE);
++cube_key_miss;
++cube_val_idx;
continue;
}
// The data generated by pslib has 10 bytes of information to be filtered
// out
memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size() );
// VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
// data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
// <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "
// <<data_ptr[7] << ", " <<data_ptr[8];
++cube_key_found;
++cube_val_idx;
} }
++sparse_idx; ++sparse_idx;
} }
VLOG(3) << "(logid=" << log_id << ") sparse tensor load success."; bool cube_fail = (cube_key_found == 0);
if (cube_fail) {
LOG(WARNING) << "(logid=" << log_id << ") cube seek fail";
}
VLOG(2) << "(logid=" << log_id << ") cube key found: " << cube_key_found
<< " , cube key miss: " << cube_key_miss;
VLOG(2) << "(logid=" << log_id << ") sparse tensor load success.";
timeline.Pause();
VLOG(2) << "dist kv, cube and datacopy time: " << timeline.ElapsedUS();
TensorVector infer_in; TensorVector infer_in;
infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end()); infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
int batch_size = input_blob->_batch_size; int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size; output_blob->_batch_size = batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
timeline.Start(); timeline.Start();
// call paddle inference here // call paddle inference here
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id << ") Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
if (cube_fail) {
float *out_ptr = static_cast<float *>(out->at(0).data.data());
out_ptr[0] = 0.0;
}
timeline.Pause();
VLOG(2) << "dist kv, pure paddle infer time: " << timeline.ElapsedUS();
CopyBlobInfo(input_blob, output_blob); CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start); AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end); AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralDistKVInferOp); DEFINE_OP(GeneralDistKVInferOp);
......
...@@ -31,7 +31,22 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -31,7 +31,22 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// read request from client // read request from client
...@@ -78,6 +93,7 @@ int GeneralReaderOp::inference() { ...@@ -78,6 +93,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0; int64_t elem_type = 0;
int64_t elem_size = 0; int64_t elem_size = 0;
int64_t databuf_size = 0; int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor; paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i); const Tensor &tensor = req->tensor(i);
...@@ -86,19 +102,38 @@ int GeneralReaderOp::inference() { ...@@ -86,19 +102,38 @@ int GeneralReaderOp::inference() {
elem_size = 0; elem_size = 0;
databuf_size = 0; databuf_size = 0;
elem_type = tensor.elem_type(); elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type; src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64 if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t); elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64; paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size(); data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) { } else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float); elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32; paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size(); data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) { } else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t); elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32; paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// copy bytes from tensor content to TensorVector
elem_size = 1;
paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) { } else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String. // use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char); elem_size = sizeof(char);
...@@ -109,7 +144,17 @@ int GeneralReaderOp::inference() { ...@@ -109,7 +144,17 @@ int GeneralReaderOp::inference() {
// now only support single string // now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) { for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1; data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
}
} }
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
} }
// implement lod tensor here // implement lod tensor here
// only support 1-D lod // only support 1-D lod
...@@ -141,44 +186,17 @@ int GeneralReaderOp::inference() { ...@@ -141,44 +186,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back(); << "] has lod_tensor and len=" << out->at(i).lod[0].back();
} }
if (elem_type == P_INT64) { void* dst_ptr = out->at(i).data.data();
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int64_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
/*
int elem_num = tensor.int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) { if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr"; LOG(ERROR) << "dst_ptr is nullptr";
return -1; return -1;
} }
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) { // For common data, we just copy from src to dst
// For string data, we need to iterate through all str
if (elem_type != P_STRING) {
memcpy(dst_ptr, src_ptr, databuf_size);
} else {
char *dst_ptr = static_cast<char *>(out->at(i).data.data()); char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0); << "] is " << tensor.data(0);
......
...@@ -74,11 +74,20 @@ int GeneralResponseOp::inference() { ...@@ -74,11 +74,20 @@ int GeneralResponseOp::inference() {
// and the order of Output is the same as the prototxt FetchVar. // and the order of Output is the same as the prototxt FetchVar.
// otherwise, you can only get the Output by the corresponding of // otherwise, you can only get the Output by the corresponding of
// Name -- Alias_name. // Name -- Alias_name.
if (req->fetch_var_names_size() > 0) {
fetch_index.resize(req->fetch_var_names_size()); fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) { for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] = fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
} else {
fetch_index.resize(model_config->_fetch_alias_name.size());
for (int i = 0; i < model_config->_fetch_alias_name.size(); ++i) {
fetch_index[i] =
model_config
->_fetch_alias_name_to_index[model_config->_fetch_alias_name[i]];
}
}
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi]; const std::string &pre_name = pre_node_names[pi];
...@@ -159,6 +168,21 @@ int GeneralResponseOp::inference() { ...@@ -159,6 +168,21 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data); output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
} else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::FLOAT16) {
tensor->set_elem_type(5);
VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} }
VLOG(2) << "(logid=" << log_id << ") fetch var [" VLOG(2) << "(logid=" << log_id << ") fetch var ["
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
import "pds_option.proto"; import "pds_option.proto";
import "builtin_format.proto"; import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; ...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true; option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -276,43 +276,65 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -276,43 +276,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name", "output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name())); google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") { if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print( printer->Print(
" baidu::rpc::ClosureGuard done_guard(done);\n" inference_body.c_str(),
" baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1021,45 +1043,65 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1021,45 +1043,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name", "output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name())); google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") { if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print( printer->Print(
" brpc::ClosureGuard done_guard(done);\n" inference_body.c_str(),
" brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1492,11 +1534,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1492,11 +1534,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si]; const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name(); std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name); printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() == if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE || google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) { fd->is_repeated()) {
...@@ -1509,10 +1546,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1509,10 +1546,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name", "field_name",
field_name); field_name);
} }
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
} }
printer->Print( printer->Print(
......
...@@ -25,7 +25,7 @@ DEFINE_int32(port, 8010, ""); ...@@ -25,7 +25,7 @@ DEFINE_int32(port, 8010, "");
DEFINE_string(workflow_path, "./conf", ""); DEFINE_string(workflow_path, "./conf", "");
DEFINE_string(workflow_file, "workflow.prototxt", ""); DEFINE_string(workflow_file, "workflow.prototxt", "");
DEFINE_string(inferservice_path, "./conf", ""); DEFINE_string(inferservice_path, "./conf", "");
DEFINE_string(inferservice_file, "service.prototxt", ""); DEFINE_string(inferservice_file, "infer_service.prototxt", "");
DEFINE_string(logger_path, "./conf", ""); DEFINE_string(logger_path, "./conf", "");
DEFINE_string(logger_file, "log.conf", ""); DEFINE_string(logger_file, "log.conf", "");
DEFINE_string(resource_path, "./conf", ""); DEFINE_string(resource_path, "./conf", "");
......
FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp) FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp ${CMAKE_CURRENT_LIST_DIR}/../../cube/cube-builder/src/seqfile_reader.cpp)
LIST(APPEND pdserving_srcs ${framework_srcs}) LIST(APPEND pdserving_srcs ${framework_srcs})
LIST(APPEND pclient_srcs ${framework_srcs}) LIST(APPEND pclient_srcs ${framework_srcs})
...@@ -26,9 +26,90 @@ ...@@ -26,9 +26,90 @@
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
// this file is included by bsf.h
namespace im { namespace im {
namespace bsf { namespace bsf {
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_init(BatchTasks<TaskT>& batchTask) {
// 双检锁,减少加锁的粒度
if (!fetch_init) {
if (taskmeta_num > 1) {
// 对于task被拆分为多个taskmeta,需要加锁。
AutoMutex lock(task_mut);
task_fetch_create(batchTask);
} else {
// 对于task只有1个taskmeta,不需要加锁。
task_fetch_create(batchTask);
}
}
return true;
}
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
if (!fetch_init) {
vector_fetch_lod_index = batchTask.vector_fetch_lod_index;
set_fetch_nobatch_index = batchTask.set_fetch_nobatch_index;
OutVectorT taskMetaOutLodTensor;
size_t fetchvar_num = batchTask._batch_out.size();
for (size_t fetchvar_index = 0; fetchvar_index < fetchvar_num;
++fetchvar_index) {
size_t fetchvar_bytesize_index =
batchTask.fetchvar_bytesize(fetchvar_index);
size_t fetchvar_batch = 0;
// 1. nobatch fetchvar情况
if (set_fetch_nobatch_index.size() > 0 &&
set_fetch_nobatch_index.find(fetchvar_index) !=
set_fetch_nobatch_index.end()) {
fetchvar_batch = 1;
} else if (vector_fetch_lod_index.size() > 0 &&
std::find(vector_fetch_lod_index.begin(),
vector_fetch_lod_index.end(),
fetchvar_index) != vector_fetch_lod_index.end()) {
// lod fetchvar情况,此时无法确定总的shape[0]
// 根据task中的task_num总数开辟task_num个临时空间
// 每个lod型的fetchvar拷贝到对应的临时空间中
// 最后再计算临时空间的总量,合并fetchvar和lod
fetchvar_batch = 0;
} else {
// 普通fetchvar情况,此时该Task总的fetchvar_batch =
// 输入的总的batch_size()
fetchvar_batch = batch_size();
}
paddle::PaddleTensor tensor_out;
tensor_out.name = batchTask._batch_out[fetchvar_index].name;
tensor_out.dtype =
paddle::PaddleDType(batchTask._batch_out[fetchvar_index].dtype);
tensor_out.shape = batchTask._batch_out[fetchvar_index].shape;
tensor_out.shape[0] = fetchvar_batch;
if (fetchvar_batch != 0) {
// 此时 lod 为空。
tensor_out.lod = batchTask._batch_out[fetchvar_index].lod;
// resize all batch memory at one time
size_t databuf_size = fetchvar_batch * fetchvar_bytesize_index;
tensor_out.data.Resize(databuf_size);
} else {
// 当taskmeta_num = 1时,由于同时只有一个taskMeta操作task
// 不涉及线程安全问题,所以此时可以直接由taskMeta->task->resize->copy
// 当task被分为多个taskMeta时,需要临时对象记录
// 收齐后再一起合并
if (taskmeta_num > 1) {
taskMetaOutLodTensor.push_back(tensor_out);
}
}
outVectorT_ptr->push_back(tensor_out);
}
// outLodTensorVector实际是一个双层vector
// shape为taskmeta_num * vector_fetch_lod_index.size();
outLodTensorVector.resize(taskmeta_num, taskMetaOutLodTensor);
fetch_init = true;
}
return true;
}
template <typename TaskT> template <typename TaskT>
void* TaskExecutor<TaskT>::thread_entry(void* args) { void* TaskExecutor<TaskT>::thread_entry(void* args) {
ThreadContext<TaskT>* context = static_cast<ThreadContext<TaskT>*>(args); ThreadContext<TaskT>* context = static_cast<ThreadContext<TaskT>*>(args);
...@@ -134,9 +215,10 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -134,9 +215,10 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
LOG(ERROR) << "Failed get TaskT from object pool"; LOG(ERROR) << "Failed get TaskT from object pool";
return TaskHandler<TaskT>::valid_handle(); return TaskHandler<TaskT>::valid_handle();
} }
task->clear();
/* /*
if (!BatchTasks<TaskT>::check_valid(in, out, _batch_align)) { if (!BatchTasks<TaskT>::check_valid(in, out, _overrun)) {
LOG(ERROR) << "Invalid input & output"; LOG(ERROR) << "Invalid input & output";
return TaskHandler<TaskT>::valid_handle(); return TaskHandler<TaskT>::valid_handle();
} }
...@@ -156,9 +238,11 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -156,9 +238,11 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr; task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr;
task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr; task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr;
if (!task->task_init()) {
LOG(ERROR) << "task->init() failed";
}
task->rem = task->batch_size(); task->rem = task->batch_size();
task->index.store(0, butil::memory_order_relaxed); task->index.store(0, butil::memory_order_relaxed);
AutoMutex lock(_mut); AutoMutex lock(_mut);
_task_queue.push_back(task); _task_queue.push_back(task);
THREAD_COND_SIGNAL(&_cond); THREAD_COND_SIGNAL(&_cond);
...@@ -168,11 +252,12 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -168,11 +252,12 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
// this function is accessed by multi thread. // this function is accessed by multi thread.
// so AutoMutex at first. // so AutoMutex at first.
// so batch.append_task is thread safe. // so batchTask.append_task is thread safe.
// you dont need to add extra lock in append_task() // you dont need to add extra lock in append_task()
// task is already init.
template <typename TaskT> template <typename TaskT>
bool TaskExecutor<TaskT>::move_task_to_batch( bool TaskExecutor<TaskT>::move_task_to_batch(
BatchTasks<TaskT>& batch) { // NOLINT BatchTasks<TaskT>& batchTask) { // NOLINT
AutoMutex lock(_mut); AutoMutex lock(_mut);
while (_task_queue.empty()) { while (_task_queue.empty()) {
THREAD_COND_WAIT(&_cond, &_mut); THREAD_COND_WAIT(&_cond, &_mut);
...@@ -183,15 +268,65 @@ bool TaskExecutor<TaskT>::move_task_to_batch( ...@@ -183,15 +268,65 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
return false; return false;
} }
TaskT* previous_task = nullptr;
while (!_task_queue.empty()) { while (!_task_queue.empty()) {
TaskT* task = _task_queue.front(); TaskT* task = _task_queue.front();
size_t rem = batch.append_task(task);
// 由于无法确定fetchVar是否为lod(即使输入是非lod,输出也可能是lod)
// 简单的处理方法是:task不能被拆分,即用户的请求可以合并一起预测,但不能拆分两个小部分去预测。
// 只需要设置engine的属性allow_split_request = false即可。
// 复杂的处理方法是允许拆分Task,无论是否包含lod.
// 难点:预测前,能够知道被拆成了几个taskmeta,但只有预测后,才知道有多少个fetchvar,多少个lod的fetchvar
// 所以,task中先要创建taskmeta_num* fetchvar
// num(lod类型的)个临时PaddleTensor(存储data及Lod)
// 由于多线程调度的单位是taskmeta,故只能在notify_task中,用taskmeta->task去创建
// 此时由于多个taskmeta对应一个task,存在多线程竞争,所以需要在task中加锁。
// 原子操作不可行,因为多个线程必须等待创建好上述的PaddleTensor后才能继续。
// 对于普通的fetch,也需要加锁去创建PaddleTensor,后续才能往里拷贝。
// _overrun表示,异步BatchTasks是否允许单次临时超过限制。
// _overrun为true时,即使BatchTasks剩下1-batch,也会全放入一个完整的Task,允许临时超限。
// _overrun为false时,不允许。
// 对于模型本身有最大Batch限制的情况,应将该值设为false,默认为false。
// 对于模型本身无最大Batch限制,但自己设置了BatchTasks的最大Batch,可以考虑设置为True。
// _allow_split_request ==
// true,则允许拆分task.BatchTasks剩下1-batch,则会从下一个Task中拆出1-Batch
// _allow_split_request ==
// false,则每个task不会被拆分。BatchTasks剩下1-batch会被浪费
// 默认为true,允许拆分task从而使得空间利用率最大。
if (!batchTask.get_allow_split_request()) {
if (task->batch_size() > batchTask.get_rem_size() &&
!batchTask.get_overrun()) {
break;
}
}
// combine_task_valid负责判断是否能够合并
// 除最外层的shape外,内层shape应一致才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 以此保证batch.append_task(task)中的task的内层shape相同。
// 对于Shape[0] = 1 而!=batch的情况,因为合并时,取其中一个的值
// 所以要求该feedvar必须相等,才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 目前没有PaddleTensor和PaddleBuff没有重载==,所以只能比较内存.
// TODO(HexToString): 可以考虑后期支持AutoPadding.
if (previous_task != nullptr) {
if (!task->combine_task_valid(previous_task)) {
break;
}
}
size_t rem = batchTask.append_task(task);
previous_task = task;
if (task->rem <= 0) { if (task->rem <= 0) {
_task_queue.pop_front(); _task_queue.pop_front();
} }
if (rem <= 0) break; if (rem <= 0) break;
} }
LOG(INFO) << "Number of tasks remaining in _task_queue is"
<< _task_queue.size();
return true; return true;
} }
...@@ -201,11 +336,12 @@ bool TaskExecutor<TaskT>::move_task_to_batch( ...@@ -201,11 +336,12 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
// TaskT is from the SingleTon TaskExecutor`s _task_queue // TaskT is from the SingleTon TaskExecutor`s _task_queue
// although TaskMeta is a local variable, but several TaskMeta may points to // although TaskMeta is a local variable, but several TaskMeta may points to
// the same TaskT which is get from the SingleTon TaskExecutor`s _task_queue. // the same TaskT which is get from the SingleTon TaskExecutor`s _task_queue.
// put TaskMeta to the local variable BatchTasks<TaskT> batch. // put TaskMeta to the local variable BatchTasks<TaskT> batchTask.
// batch.merge_tasks() and batch.notify_tasks() has no lock. // batchTask.merge_tasks() and batchTask.notify_tasks() has no lock.
// BatchTasks<TaskT> batch itself is a local variable, it`s thread safe. // BatchTasks<TaskT> batchTask itself is a local variable, it`s thread safe.
// If batch.merge_tasks() and batch.notify_tasks() do something to TaskMeta // If batchTask.merge_tasks() and batchTask.notify_tasks() do something to
// TaskMeta
// you need to pay attention to that. // you need to pay attention to that.
// Multi-Thread deal with different TaskMeta(cause it`s created as local // Multi-Thread deal with different TaskMeta(cause it`s created as local
// variable) // variable)
...@@ -242,11 +378,23 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) { ...@@ -242,11 +378,23 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) {
return -1; return -1;
} }
BatchTasks<TaskT> batch(_batch_size, _batch_align); // move_task_to_batch() take the original task from the `_task_queue`
if (move_task_to_batch(batch)) { // put the original task into its own Vector<taskmeta>
batch.merge_tasks(); // the capacity of its own Vector<taskmeta> is decided by `_batch_size` or
_fn(&batch.in(), &batch.out()); // `_overrun`
batch.notify_tasks();
// merge_tasks() move the imput-data into `_batch_in` from its own
// Vector<taskmeta>.
// because the predictor`s input is the `_batch_in`
// notify_tasks() move the output-data into every single taskmeta from
// `_batch_out`.
// because the predictor`s output is the `_batch_out`
BatchTasks<TaskT> batchTask(_batch_size, _overrun, _allow_split_request);
if (move_task_to_batch(batchTask)) {
batchTask.merge_tasks();
_fn(&batchTask.in(), &batchTask.out());
batchTask.notify_tasks();
} }
} }
......
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#include "core/predictor/framework/cache.h"
#include <dirent.h>
#include <sys/stat.h>
#include <fstream>
#include <string>
#include <utility>
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
int CubeCache::clear() {
for (auto it = _map_cache.begin(); it != _map_cache.end(); ++it) {
if (it->second) {
delete (it->second);
it->second = nullptr;
}
}
_map_cache.clear();
return 0;
}
rec::mcube::CubeValue* CubeCache::get_data(uint64_t key) {
auto it = _map_cache.find(key);
if (it != _map_cache.end()) {
return it->second;
}
return nullptr;
}
int CubeCache::reload_data(const std::string& cache_path) {
LOG(INFO) << "cube cache is loading data, path: " << cache_path;
DIR* dp = nullptr;
struct dirent* dirp = nullptr;
struct stat st;
// clear cache data
clear();
// loading data from cache files
if (stat(cache_path.c_str(), &st) < 0 || !S_ISDIR(st.st_mode)) {
LOG(ERROR) << "invalid cache path " << cache_path;
return -1;
}
if ((dp = opendir(cache_path.c_str())) == nullptr) {
LOG(ERROR) << "opendir " << cache_path << " fail.";
return -1;
}
while ((dirp = readdir(dp)) != nullptr) {
// filtering by file type.
if (dirp->d_type != DT_REG) {
continue;
}
// Filter upper-level directories and hidden files
if ((!strncmp(dirp->d_name, ".", 1)) || (!strncmp(dirp->d_name, "..", 2))) {
continue;
}
// Match the file whose name prefix is ​​'part-'
if (std::string(dirp->d_name).find("part-") != std::string::npos) {
SequenceFileRecordReader reader(cache_path + "/" + dirp->d_name);
if (reader.open() != 0) {
LOG(ERROR) << "open file failed! " << dirp->d_name;
continue;
}
if (reader.read_header() != 0) {
LOG(ERROR) << "read header error! " << dirp->d_name;
reader.close();
continue;
}
Record record(reader.get_header());
while (reader.next(&record) == 0) {
uint64_t key =
*reinterpret_cast<uint64_t*>(const_cast<char*>(record.key.data()));
auto it_find = _map_cache.find(key);
if (it_find != _map_cache.end()) {
// load dumplicate key
LOG(WARNING) << "Load dumplicate key:" << key
<< " from file:" << dirp->d_name;
continue;
}
rec::mcube::CubeValue* new_value = new rec::mcube::CubeValue();
new_value->error = 0;
new_value->buff.swap(record.value);
_map_cache.insert(std::make_pair(key, new_value));
}
LOG(WARNING) << "Load cube cache file " << dirp->d_name << " done.";
}
LOG(WARNING) << "Load all cube cache files done";
}
return 0;
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sys/types.h>
#include <numeric>
#include <string>
#include <unordered_map>
#include "core/cube/cube-api/include/cube_api.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
// Large models that use sparse parameters may use cube cache.
// When the cube cache exists, the model is required to be
// consistent with the version of the cube cache. Therefore,
// when the model is updated, the model and the cube cache are
// required to be reloaded at the same time.
// Load all cached data at once without updating, it's lock free
// switching two cube cache.
class CubeCache {
public:
CubeCache() {}
~CubeCache() { clear(); }
// clear cache data.
int clear();
// get cache data by key
rec::mcube::CubeValue* get_data(uint64_t key);
// reload all cache files from cache_path
int reload_data(const std::string& cache_path);
private:
// switching free lock, key type is uint64_t, value type is CubeValue*
std::unordered_map<uint64_t, rec::mcube::CubeValue*> _map_cache;
};
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
...@@ -21,6 +21,15 @@ ...@@ -21,6 +21,15 @@
#include <string> #include <string>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/op_repository.h" #include "core/predictor/framework/op_repository.h"
#ifdef BCLOUD
#include <base/atomicops.h>
#else
#include <butil/atomicops.h>
#endif
#include <errno.h>
#include "core/predictor/framework/resource.h"
using baidu::paddle_serving::predictor::Resource;
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
...@@ -238,6 +247,77 @@ const Channel* DagView::get_response_channel(const uint64_t log_id) const { ...@@ -238,6 +247,77 @@ const Channel* DagView::get_response_channel(const uint64_t log_id) const {
return last_op->mutable_channel(); return last_op->mutable_channel();
} }
void* call_back(void* ori_args) {
Resource::instance().thread_initialize();
Args* args = (Args*)ori_args;
Op* op = static_cast<Op*>(args->_op);
uint64_t log_id = static_cast<uint64_t>(args->_log_id);
bool debug = static_cast<bool>(args->_debug);
args->errcode = op->process(log_id, debug);
return nullptr;
}
int ParallelDagView::execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size();
std::vector<THREAD_T> tids(node_size);
Args* args = new Args[node_size];
VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf;
Op* op = vnode->op;
TRACEPRINTF(
"(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
args[ni]._op = op;
args[ni]._log_id = log_id;
args[ni]._debug = (debug_os != NULL);
int rc = THREAD_CREATE(&tids[ni], NULL, call_back, (void*)(args + ni));
if (rc != 0) {
LOG(ERROR) << "failed to create ParallelDagView worker thread: index="
<< ni << ", rc=" << rc << ", errno=" << errno << ":"
<< strerror(errno);
delete[] args;
return -1;
}
}
for (uint32_t ni = 0; ni < node_size; ni++) {
THREAD_JOIN(tids[ni], NULL);
int errcode = args[ni].errcode;
Op* op = args[ni]._op;
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) {
LOG(ERROR) << "(logid=" << log_id
<< ") Execute failed, Op:" << op->debug_string();
delete[] args;
return errcode;
}
if (errcode > 0) {
LOG(INFO) << "(logid=" << log_id
<< ") Execute ignore, Op:" << op->debug_string();
continue;
}
if (debug_os) {
(*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}";
}
// LOG(DEBUG) << "Execute succ, Op:" << op->debug_string();
}
stage_time.stop();
PredictorMetric::GetInstance()->update_latency_metric(
STAGE_METRIC_PREFIX + vstage->full_name, stage_time.u_elapsed());
delete[] args;
return ERR_OK;
}
} // namespace predictor } // namespace predictor
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -24,7 +24,7 @@ namespace baidu { ...@@ -24,7 +24,7 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
class Op; // class Op;
struct ViewNode { struct ViewNode {
Op* op; // op->full_name == service_workflow_stageindex_opname Op* op; // op->full_name == service_workflow_stageindex_opname
...@@ -75,11 +75,20 @@ class DagView { ...@@ -75,11 +75,20 @@ class DagView {
Bus* _bus; Bus* _bus;
}; };
struct Args {
Op* _op;
uint64_t _log_id;
bool _debug;
int errcode;
};
// The derived DagView supports parallel execution // The derived DagView supports parallel execution
// strategy, by implments the execute_one_stage(). // strategy, by implments the execute_one_stage().
class ParallelDagView : public DagView { class ParallelDagView : public DagView {
public: public:
int execute_one_stage(ViewStage* vstage, butil::IOBufBuilder*) { return 0; } virtual int execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os);
}; };
} // namespace predictor } // namespace predictor
......
...@@ -25,7 +25,8 @@ int ReloadableInferEngine::proc_initialize_impl( ...@@ -25,7 +25,8 @@ int ReloadableInferEngine::proc_initialize_impl(
_model_dir = conf.model_dir(); _model_dir = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num(); _infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size(); _infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align(); _infer_overrun = conf.enable_overrun();
_allow_split_request = conf.allow_split_request();
_conf = conf; _conf = conf;
...@@ -56,9 +57,6 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -56,9 +57,6 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
} }
// init bsf framework // init bsf framework
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index] im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn( .set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this)); boost::bind(&InferEngine::thrd_initialize_impl, this));
...@@ -69,8 +67,10 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -69,8 +67,10 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
boost::bind(&InferEngine::task_infer_impl, this, _1, _2)); boost::bind(&InferEngine::task_infer_impl, this, _1, _2));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_size( im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_size(
_infer_batch_size); _infer_batch_size);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_align( im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_overrun(
_infer_batch_align); _infer_overrun);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_allow_split_request(_allow_split_request);
if (im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].start( if (im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].start(
_infer_thread_num) != 0) { _infer_thread_num) != 0) {
LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num; LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num;
...@@ -79,7 +79,8 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -79,7 +79,8 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
LOG(WARNING) << "Enable batch schedule framework, thread_num:" LOG(WARNING) << "Enable batch schedule framework, thread_num:"
<< _infer_thread_num << ", batch_size:" << _infer_batch_size << _infer_thread_num << ", batch_size:" << _infer_batch_size
<< ", enable_batch_align:" << _infer_batch_align; << ", enable_overrun:" << _infer_overrun
<< ", allow_split_request:" << _allow_split_request;
return 0; return 0;
} }
...@@ -348,7 +349,7 @@ T* VersionedInferEngine::get_core() { ...@@ -348,7 +349,7 @@ T* VersionedInferEngine::get_core() {
} }
template <typename T> template <typename T>
T* VersionedInferEngine::get_core(uint64_t version) { T* VersionedInferEngine::get_core(const uint64_t version) {
auto iter = _versions.find(version); auto iter = _versions.find(version);
if (iter == _versions.end()) { if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version; LOG(ERROR) << "Not found version engine: " << version;
...@@ -363,6 +364,15 @@ T* VersionedInferEngine::get_core(uint64_t version) { ...@@ -363,6 +364,15 @@ T* VersionedInferEngine::get_core(uint64_t version) {
return NULL; return NULL;
} }
CubeCache* VersionedInferEngine::get_cube_cache() {
InferEngine* engine = default_engine();
if (!engine) {
LOG(WARNING) << "fail to get default engine";
return nullptr;
}
return engine->get_cube_cache();
}
int VersionedInferEngine::proc_initialize_impl( int VersionedInferEngine::proc_initialize_impl(
const configure::EngineDesc& conf, bool) { const configure::EngineDesc& conf, bool) {
return -1; return -1;
...@@ -382,6 +392,11 @@ int VersionedInferEngine::task_infer_impl(const void* in, ...@@ -382,6 +392,11 @@ int VersionedInferEngine::task_infer_impl(const void* in,
return -1; return -1;
} }
int InferManager::set_taskexecutor_num(size_t total_engine_num) {
im::bsf::TaskExecutorVector<TaskT>::instance().resize(total_engine_num);
return 0;
}
int InferManager::proc_initialize(const char* path, int InferManager::proc_initialize(const char* path,
const char* file, const char* file,
std::shared_ptr<int> engine_index_ptr) { std::shared_ptr<int> engine_index_ptr) {
...@@ -391,8 +406,6 @@ int InferManager::proc_initialize(const char* path, ...@@ -391,8 +406,6 @@ int InferManager::proc_initialize(const char* path,
return -1; return -1;
} }
uint32_t engine_num = model_toolkit_conf.engines_size(); uint32_t engine_num = model_toolkit_conf.engines_size();
im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr +
engine_num);
for (uint32_t ei = 0; ei < engine_num; ++ei) { for (uint32_t ei = 0; ei < engine_num; ++ei) {
LOG(INFO) << "model_toolkit_conf.engines(" << ei LOG(INFO) << "model_toolkit_conf.engines(" << ei
<< ").name: " << model_toolkit_conf.engines(ei).name(); << ").name: " << model_toolkit_conf.engines(ei).name();
...@@ -502,6 +515,15 @@ T* InferManager::get_core(const char* model_name) { ...@@ -502,6 +515,15 @@ T* InferManager::get_core(const char* model_name) {
return NULL; return NULL;
} }
CubeCache* InferManager::get_cube_cache(const char* model_name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
return nullptr;
}
return it->second->get_cube_cache();
}
// Versioned inference interface // Versioned inference interface
int InferManager::infer(const char* model_name, int InferManager::infer(const char* model_name,
const void* in, const void* in,
...@@ -517,7 +539,7 @@ int InferManager::infer(const char* model_name, ...@@ -517,7 +539,7 @@ int InferManager::infer(const char* model_name,
} }
template <typename T> template <typename T>
T* InferManager::get_core(const char* model_name, uint64_t version) { T* InferManager::get_core(const char* model_name, const uint64_t version) {
auto it = _map.find(model_name); auto it = _map.find(model_name);
if (it == _map.end()) { if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
......
此差异已折叠。
...@@ -135,6 +135,17 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -135,6 +135,17 @@ int Resource::initialize(const std::string& path, const std::string& file) {
if (FLAGS_enable_model_toolkit) { if (FLAGS_enable_model_toolkit) {
size_t model_toolkit_num = resource_conf.model_toolkit_path_size(); size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
// 此处暂时认为,每个model_toolkit仅包含一个engine
// 故认为 model_toolkit_num == engine总数
// 若以后出现model_toolkit仅包含多个engine
// 则应先for循环统计engine总数,再set_taskexecutor_num
// 切不可动态im::bsf::TaskExecutorVector<TaskT>::instance().resize
// TaskExecutor是线程池,内含锁,在engine进程初始化时已开始work加锁循环运行了
// 之后再resize内存搬运,会导致work使用原锁,而搬运后的TaskExecutor的锁内存已改变
if (InferManager::instance().set_taskexecutor_num(model_toolkit_num) != 0) {
LOG(ERROR) << "failed set_taskexecutor_num";
return -1;
}
std::shared_ptr<int> engine_index_ptr(new int(0)); std::shared_ptr<int> engine_index_ptr(new int(0));
for (size_t mi = 0; mi < model_toolkit_num; ++mi) { for (size_t mi = 0; mi < model_toolkit_num; ++mi) {
std::string model_toolkit_path = resource_conf.model_toolkit_path(mi); std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
...@@ -165,18 +176,18 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -165,18 +176,18 @@ int Resource::initialize(const std::string& path, const std::string& file) {
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
std::string cube_config_fullpath = "./" + resource_conf.cube_config_path() + std::string cube_config_fullpath = "./" + resource_conf.cube_config_path() +
"/" + resource_conf.cube_config_file(); "/" + resource_conf.cube_config_file();
this->cube_config_fullpath = cube_config_fullpath; this->_cube_config_fullpath = cube_config_fullpath;
this->cube_quant_bits = resource_conf.has_cube_quant_bits() this->_cube_quant_bits = resource_conf.has_cube_quant_bits()
? resource_conf.cube_quant_bits() ? resource_conf.cube_quant_bits()
: 0; : 0;
if (this->cube_quant_bits != 0 && this->cube_quant_bits != 8) { if (this->_cube_quant_bits != 0 && this->_cube_quant_bits != 8) {
LOG(ERROR) << "Cube quant bits illegal! should be 0 or 8."; LOG(ERROR) << "Cube quant bits illegal! should be 0 or 8.";
return -1; return -1;
} }
if (this->cube_quant_bits == 0) { if (this->_cube_quant_bits == 0) {
LOG(INFO) << "cube quant mode OFF"; LOG(INFO) << "cube quant mode OFF";
} else { } else {
LOG(INFO) << "cube quant mode ON, quant bits: " << this->cube_quant_bits; LOG(INFO) << "cube quant mode ON, quant bits: " << this->_cube_quant_bits;
} }
} }
...@@ -187,10 +198,10 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -187,10 +198,10 @@ int Resource::initialize(const std::string& path, const std::string& file) {
// model config // model config
int Resource::general_model_initialize(const std::string& path, int Resource::general_model_initialize(const std::string& path,
const std::string& file) { const std::string& file) {
if (this->cube_config_fullpath.size() != 0) { if (this->_cube_config_fullpath.size() != 0) {
LOG(INFO) << "init cube by config file : " << this->cube_config_fullpath; LOG(INFO) << "init cube by config file : " << this->_cube_config_fullpath;
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
int ret = cube->init(this->cube_config_fullpath.c_str()); int ret = cube->init(this->_cube_config_fullpath.c_str());
if (ret != 0) { if (ret != 0) {
LOG(ERROR) << "cube init error"; LOG(ERROR) << "cube init error";
return -1; return -1;
...@@ -315,7 +326,7 @@ int Resource::thread_clear() { ...@@ -315,7 +326,7 @@ int Resource::thread_clear() {
} }
return 0; return 0;
} }
size_t Resource::get_cube_quant_bits() { return this->cube_quant_bits; } size_t Resource::get_cube_quant_bits() { return this->_cube_quant_bits; }
int Resource::reload() { int Resource::reload() {
if (FLAGS_enable_model_toolkit && InferManager::instance().reload() != 0) { if (FLAGS_enable_model_toolkit && InferManager::instance().reload() != 0) {
......
...@@ -16,8 +16,10 @@ ...@@ -16,8 +16,10 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "core/cube/cube-api/include/cube_api.h" #include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
...@@ -27,6 +29,8 @@ namespace baidu { ...@@ -27,6 +29,8 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
// Paddle general model configuration, read the model configuration information
// from the general_model_config.proto file
class PaddleGeneralModelConfig { class PaddleGeneralModelConfig {
public: public:
PaddleGeneralModelConfig() {} PaddleGeneralModelConfig() {}
...@@ -34,23 +38,47 @@ class PaddleGeneralModelConfig { ...@@ -34,23 +38,47 @@ class PaddleGeneralModelConfig {
~PaddleGeneralModelConfig() {} ~PaddleGeneralModelConfig() {}
public: public:
// feed/fetch name and alias_name
std::vector<std::string> _feed_name; std::vector<std::string> _feed_name;
std::vector<std::string> _feed_alias_name; std::vector<std::string> _feed_alias_name;
std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<bool> _is_lod_feed; // true lod tensor
std::vector<bool> _is_lod_fetch; // whether a fetch var is lod_tensor
std::vector<int> _capacity; // capacity for each tensor
/*
feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/
std::vector<std::vector<int>> _feed_shape;
std::vector<std::string> _fetch_name; std::vector<std::string> _fetch_name;
std::vector<std::string> _fetch_alias_name; std::vector<std::string> _fetch_alias_name;
// Be consistent with model saving interface var type conversion
// (python/paddle serving client/io/__init__)
// int64 => 0;
// float32 => 1;
// int32 => 2;
// float64 => 3;
// int16 => 4;
// float16 => 5;
// bfloat16 => 6;
// uint8 => 7;
// int8 => 8;
// bool => 9;
// complex64 => 10,
// complex128 => 11;
std::vector<int> _feed_type;
// whether a feed or fetch var is lod_tensor.
std::vector<bool> _is_lod_feed;
std::vector<bool> _is_lod_fetch;
// capacity for each tensor
std::vector<int> _capacity;
// _feed_shape and _fetch_shape are used to represent the dimensional
// information of tensor.
// for examples, feed_shape_[i][j] represents the j(th) dim for i(th) input
// tensor.
// if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
std::vector<std::vector<int>> _feed_shape;
std::vector<std::vector<int>> _fetch_shape; std::vector<std::vector<int>> _fetch_shape;
// fetch name -> index of fetch_name vector.
std::map<std::string, int> _fetch_name_to_index; std::map<std::string, int> _fetch_name_to_index;
// fetch alias name -> index of fetch_alias_name vector.
std::map<std::string, int> _fetch_alias_name_to_index; std::map<std::string, int> _fetch_alias_name_to_index;
}; };
...@@ -73,33 +101,50 @@ class Resource { ...@@ -73,33 +101,50 @@ class Resource {
return ins; return ins;
} }
// initialize resource
int initialize(const std::string& path, const std::string& file); int initialize(const std::string& path, const std::string& file);
// loading all models configurations from prototxt
int general_model_initialize(const std::string& path, int general_model_initialize(const std::string& path,
const std::string& file); const std::string& file);
// initialize thread local data
int thread_initialize(); int thread_initialize();
// clear thread local data
int thread_clear(); int thread_clear();
// reload resources
int reload(); int reload();
// finalize
int finalize(); int finalize();
// get all model configs
std::vector<std::shared_ptr<PaddleGeneralModelConfig>> std::vector<std::shared_ptr<PaddleGeneralModelConfig>>
get_general_model_config(); get_general_model_config();
// print all configurations of all models
void print_general_model_config( void print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig>& config); const std::shared_ptr<PaddleGeneralModelConfig>& config);
// get cube quantity bit size
size_t get_cube_quant_bits(); size_t get_cube_quant_bits();
private: private:
int thread_finalize() { return 0; } int thread_finalize() { return 0; }
private:
// configuration infermation of all models, loading from prototxt files
std::vector<std::shared_ptr<PaddleGeneralModelConfig>> _configs; std::vector<std::shared_ptr<PaddleGeneralModelConfig>> _configs;
std::string cube_config_fullpath;
int cube_quant_bits; // 0 if no empty
// full path of cube configuration file.
std::string _cube_config_fullpath;
// cube quantify bit size, support 0/8. set 0 if no quant.
size_t _cube_quant_bits;
// bthread local key
THREAD_KEY_T _tls_bspec_key; THREAD_KEY_T _tls_bspec_key;
}; };
......
...@@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5) else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32; resize_h = 32;
else else
resize_h = (resize_h / 32) * 32; resize_h = (resize_h / 32 - 1) * 32;
if (resize_w % 32 == 0) if (resize_w % 32 == 0)
resize_w = resize_w; resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5) else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32; resize_w = 32;
else else
resize_w = (resize_w / 32) * 32; resize_w = (resize_w / 32 - 1) * 32;
if (!use_tensorrt) { if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h); ratio_h = float(resize_h) / float(h);
......
...@@ -12,7 +12,7 @@ BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据 ...@@ -12,7 +12,7 @@ BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据
### Http+protobuf方式 ### Http+protobuf方式
各种语言都提供了对ProtoBuf的支持,如果您对此比较熟悉,您也可以先将数据使用ProtoBuf序列化,再将序列化后的数据放入Http请求数据体中,然后指定Content-Type: application/proto,从而使用http/h2+protobuf二进制串访问服务。 各种语言都提供了对ProtoBuf的支持,如果您对此比较熟悉,您也可以先将数据使用ProtoBuf序列化,再将序列化后的数据放入Http请求数据体中,然后指定Content-Type: application/proto,从而使用http/h2+protobuf二进制串访问服务。
实测随着数据量的增大,使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加,推荐当您的数据量较大时,使用Http+protobuf方式,后续我们会在框架的HttpClient中增加该功能,目前暂没有支持。 实测随着数据量的增大,使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加,推荐当您的数据量较大时,使用Http+protobuf方式,目前已经在Java和Python的Client端提供了支持。
**理论上讲,序列化/反序列化的性能从高到底排序为:protobuf > http/h2+protobuf > http** **理论上讲,序列化/反序列化的性能从高到底排序为:protobuf > http/h2+protobuf > http**
...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 - ...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。 为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。
使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。 使用HttpClient最简单只需要四步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数,通过Http方式请求预测服务。
此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。 此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。
...@@ -52,7 +52,9 @@ Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClien ...@@ -52,7 +52,9 @@ Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClien
如果不能满足您的需求,您也可以在此基础上添加一些功能。 如果不能满足您的需求,您也可以在此基础上添加一些功能。
如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发,请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md,后续如果需求很大,我们也会将这部分功能加入到Server中,尽情期待。 如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发,请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md
后续如果需求很大,我们也会将这部分功能加入到Server中,尽情期待。
### curl方式发送Http请求(基本原理) ### curl方式发送Http请求(基本原理)
...@@ -101,7 +103,7 @@ repeated int32 numbers = 1; ...@@ -101,7 +103,7 @@ repeated int32 numbers = 1;
``` ```
#### elem_type #### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string) 表示数据类型,0 means int64, 1 means float32, 2 means int32, 20 means bytes(string)
#### fetch_var_names #### fetch_var_names
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model blazeface python3 -m paddle_serving_app.package --get_model blazeface
tar -xf blazeface.tar.gz tar -xf blazeface.tar.gz
``` ```
...@@ -11,13 +11,13 @@ tar -xf blazeface.tar.gz ...@@ -11,13 +11,13 @@ tar -xf blazeface.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9494 python3 -m paddle_serving_server.serve --model serving_server --port 9494
``` ```
### Client Prediction ### Client Prediction
``` ```
python test_client.py serving_client/serving_client_conf.prototxt test.jpg python3 test_client.py serving_client/serving_client_conf.prototxt test.jpg
``` ```
the result is in `output` folder, including a json file and image file with bounding boxes. the result is in `output` folder, including a json file and image file with bounding boxes.
...@@ -10,12 +10,12 @@ If you want to have more detection models, please refer to [Paddle Detection Mod ...@@ -10,12 +10,12 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
### Start the service ### Start the service
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0 python3 -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### Perform prediction ### Perform prediction
``` ```
python test_client.py python3 test_client.py 000000570688.jpg
``` ```
Image with bounding boxes and json result would be saved in `output` folder. Image with bounding boxes and json result would be saved in `output` folder.
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz tar xf cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
background
person person
bicycle bicycle
car car
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册