提交 770b6c26 编写于 作者: B bjjwwang

Merge branch 'develop' of https://github.com/paddlepaddle/serving into develop

......@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### Description of asynchronous model
......
......@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT |
| `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### 异步模型的说明
......
......@@ -61,8 +61,11 @@ else()
endif()
if(CUDNN_FOUND)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
endif()
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
......
......@@ -27,52 +27,54 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/
message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "2.1.0")
SET(PADDLE_VERSION "2.2.0-rc0")
if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0)
set(CUDA_SUFFIX "cuda11.0-cudnn8-mkl-gcc8.2")
message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}")
# cuda 11.0 is not supported, 11.2 would be added.
if(CUDA_VERSION EQUAL 10.1)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5")
set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.2)
set(CUDA_SUFFIX "cuda10.2-cudnn8-mkl-gcc8.2")
set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.1)
set(CUDA_SUFFIX "cuda10.1-cudnn7-mkl-gcc8.2")
if(CUDNN_MAJOR_VERSION EQUAL 7)
set(CUDA_SUFFIX "x86-64_gcc5.4_avx_mkl_cuda10.2_cudnn7.6.5_trt6.0.1.5")
set(WITH_TRT ON)
elseif(CUDNN_MAJOR_VERSION EQUAL 8)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4")
set(WITH_TRT ON)
endif()
elseif(CUDA_VERSION EQUAL 11.2)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda11.2_cudnn8.2.1_trt8.0.3.4")
set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.0)
set(CUDA_SUFFIX "cuda10-cudnn7-avx-mkl")
elseif(CUDA_VERSION EQUAL 9.0)
set(CUDA_SUFFIX "cuda9-cudnn7-avx-mkl")
endif()
else()
set(WITH_TRT OFF)
endif()
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}")
elseif (WITH_LITE)
if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu")
SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif()
else()
if (WITH_AVX)
if (WITH_MKLML)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-mkl")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_mkl")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-openblas")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_openblas")
endif()
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-noavx-openblas")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_openblas")
endif()
endif()
if(WITH_LITE)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ")
else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif()
......
......@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
rpc inference(Request) returns (Response) {}
rpc debug(Request) returns (Response) {}
rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response);
};
......@@ -22,11 +22,8 @@ message EngineDesc {
required string reloadable_type = 4;
required string model_dir = 5;
repeated int32 gpu_ids = 6;
required int32 runtime_thread_num = 7;
required int32 batch_infer_size = 8;
required int32 enable_batch_align = 9;
optional string version_file = 10;
optional string version_type = 11;
optional string version_file = 7;
optional string version_type = 8;
/*
* Sparse Parameter Service type. Valid types are:
......@@ -39,17 +36,34 @@ message EngineDesc {
LOCAL = 1;
REMOTE = 2;
}
optional SparseParamServiceType sparse_param_service_type = 12;
optional string sparse_param_service_table_name = 13;
optional bool enable_memory_optimization = 14;
optional bool enable_ir_optimization = 15;
optional bool use_trt = 16;
optional bool use_lite = 17;
optional bool use_xpu = 18;
optional bool use_gpu = 19;
optional bool combined_model = 20;
optional bool encrypted_model = 21;
optional bool gpu_multi_stream = 22;
optional SparseParamServiceType sparse_param_service_type = 10;
optional string sparse_param_service_table_name = 11;
optional bool enable_memory_optimization = 12;
optional bool enable_ir_optimization = 13;
optional bool use_trt = 14;
optional bool use_lite = 15;
optional bool use_xpu = 16;
optional bool use_gpu = 17;
optional bool combined_model = 18;
optional bool encrypted_model = 19;
optional bool gpu_multi_stream = 20;
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
* n > 0 means how many Predictor for this engine in Asynchronous task
* scheduling mode.
* "batch_infer_size": the max batch for this engine in Asynchronous task
* scheduling mode.
* "enable_overrun": always put a whole task into the TaskQueue even if the
* total batch is bigger than "batch_infer_size".
* "allow_split_request": allow to split task(which is corresponding to
* request).
*/
optional int32 runtime_thread_num = 30 [ default = 0 ];
optional int32 batch_infer_size = 31 [ default = 32 ];
optional bool enable_overrun = 32 [ default = false ];
optional bool allow_split_request = 33 [ default = true ];
};
// model_toolkit conf
......@@ -61,11 +75,14 @@ message ResourceConf {
repeated string model_toolkit_file = 2;
repeated string general_model_path = 3;
repeated string general_model_file = 4;
optional string cube_config_path = 5;
optional string cube_config_file = 6;
optional int32 cube_quant_bits = 7; // set 0 if no quant.
optional string auth_product_name = 8;
optional string auth_container_id = 9;
optional string cube_config_path = 10;
optional string cube_config_file = 11;
optional int32 cube_quant_bits = 12;
optional string cube_cache_path = 13;
optional string auth_product_name = 20;
optional string auth_container_id = 21;
};
// DAG node depency info
......
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
)
func main() {
dict_name := flag.String("n", "test", "cube name")
conf_path := flag.String("c", "./conf/cube.conf", "cube conf path")
input_path := flag.String("i", "./input.json", "keys to seek")
output_path := flag.String("o", "./output.json", "result to save")
flag.Parse()
bytes, err := ioutil.ReadFile(*conf_path)
if err != nil {
fmt.Println("读取配置文件失败", err)
return
}
var meta Meta
err = json.Unmarshal(bytes, &meta.Servers)
if err != nil {
fmt.Println("解析数据失败", err)
return
}
err = meta.Seek(*dict_name, *input_path, *output_path)
if err != nil {
fmt.Println(err)
}
return
}
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
package main
import "fmt"
type Meta struct {
Servers []CubeServer `json:"servers,omitempty"`
}
func (meta *Meta) Seek(dict_name string, input string, output string) (err error) {
var server CubeServer
for _, s := range meta.Servers {
if s.Name == dict_name {
server = s
break
}
}
if server.Name != dict_name {
err = fmt.Errorf("%s server not exist", dict_name)
return err
}
err = server.Seek(input, output)
return err
}
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
)
type Input struct {
Keys []uint64 `json:"keys"`
}
type SingleValue struct {
Status uint32 `json:"status"`
Value string `json:"value"`
}
type Output struct {
Values []SingleValue `json:"values"`
}
type ServerNode struct {
Ip string `json:"ip"`
Port uint64 `json:"port"`
}
type CubeServer struct {
Name string `json:"dict_name"`
Shard uint64 `json:"shard"`
Nodes []ServerNode `json:"nodes"`
}
func (server *CubeServer) SplitKeys(keys []uint64) (splited_keys map[uint64]Input, offset map[uint64][]uint64) {
splited_keys = make(map[uint64]Input)
offset = make(map[uint64][]uint64)
for i, key := range keys {
shard_id := key % server.Shard
temp_split, _ := splited_keys[shard_id]
temp_split.Keys = append(temp_split.Keys, key)
splited_keys[shard_id] = temp_split
temp_offset, _ := offset[shard_id]
temp_offset = append(temp_offset, uint64(i))
offset[shard_id] = temp_offset
}
return splited_keys, offset
}
func (server *CubeServer) Seek(input string, output_path string) (err error) {
file, err := os.Open(input)
if err != nil {
return err
}
defer file.Close()
buf := bufio.NewReader(file)
for {
line, err := buf.ReadBytes('\n')
//line = strings.TrimSpace(line)
if err != nil || io.EOF == err {
break
}
var temp_input Input
json.Unmarshal(line, &temp_input)
key_nums := len(temp_input.Keys)
var output Output
output.Values = make([]SingleValue, key_nums+1)
splited_keys, offset := server.SplitKeys(temp_input.Keys)
for shard_id, keys := range splited_keys {
cur_output, _ := server.Post(shard_id, keys)
for index, single_value := range cur_output.Values {
output.Values[offset[shard_id][index]] = single_value
}
}
json_str, _ := json.Marshal(output)
fp, err := os.OpenFile(output_path, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0755)
if err != nil {
log.Fatal(err)
}
defer fp.Close()
_, err = fp.Write(json_str)
}
return err
}
func (server *CubeServer) Post(shard_id uint64, input Input) (output Output, err error) {
if shard_id >= uint64(len(server.Nodes)) {
err = fmt.Errorf("have no shard:%v", shard_id)
return output, err
}
json_str, _ := json.Marshal(input)
URL := fmt.Sprintf("http://%s:%v/DictService/seek", server.Nodes[shard_id].Ip, server.Nodes[shard_id].Port)
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(json_str))
if err != nil {
return output, err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return output, err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return output, err
}
err = json.Unmarshal(body, &output)
return output, err
}
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
#coding=utf-8
import requests
import sys
import json
class Meta(object):
"""记录cube分片server路由"""
def __init__(self, conf_path):
"""根据配置文件初始化路由"""
self.server_api = "/DictService/seek"
self.server_meta = {}
with open(conf_path, "r", encoding="utf8") as fp:
cube_servcers = json.load(fp)
for server in cube_servcers:
self.server_meta[server["dict_name"]] = server
fp.close()
def seek(self, dict_name, keys_path, save_path):
"""查询"""
save_file = open(save_path, 'w')
with open(keys_path, "r", encoding="utf8") as fp:
lines = fp.readlines()
for line in lines:
json_line = json.loads(line)
values = [{} for i in range(len(json_line["keys"]))]
splited_keys, offset = self.split_keys(json_line)
for shard_id, keys in splited_keys.items():
results = self.post(dict_name, shard_id, keys)
for i, result in enumerate(results["values"]):
values[offset[shard_id][i]] = result
cur_line_results = {}
cur_line_results["values"] = values
json.dump(cur_line_results, save_file)
save_file.write("\n")
fp.close()
save_file.close()
def split_keys(self, json_line):
"""根据key值及分片数判断去哪一个分片上查询"""
keys_split = {}
offset = {}
i = 0
for key in json_line["keys"]:
shard_id = key % self.server_meta[dict_name]["shard"]
if shard_id not in keys_split:
keys_split[shard_id] = []
keys_split[shard_id].append(key)
if shard_id not in offset:
offset[shard_id] = []
offset[shard_id].append(i)
i += 1
return keys_split, offset
def post(self, dict_name, shard_id, keys):
"""向分片server发送post请求"""
api = "http://%s:%s%s" % (self.server_meta[dict_name]["nodes"][shard_id]["ip"],
self.server_meta[dict_name]["nodes"][shard_id]["port"],
self.server_api)
data = {"keys": keys}
response = requests.post(api, json.dumps(data))
return response.json()
if __name__ == '__main__':
if len(sys.argv) != 5:
print('please usage: python demo.py conf_path dict_name keys_path save_path')
exit(0)
conf_path = sys.argv[1]
dict_name = sys.argv[2]
keys_path = sys.argv[3]
save_path = sys.argv[4]
meta = Meta(conf_path)
meta.seek(dict_name, keys_path, save_path)
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
\ No newline at end of file
# cube python api说明文档
参考[大规模稀疏参数服务Cube的部署和使用](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md#2-大规模稀疏参数服务cube的部署和使用)文档进行cube的部署。
使用python api,可替代上述文档中第3节预测服务的部署、使用
## 配置说明
conf/cube.conf 以json格式,设置各个分片cube server的ip以及port,shard与分片数一致,示例:
```bash
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
```
## 数据格式
```bash
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
```
支持批量查询,每次查询一行
## 使用
```bash
cd ./python-api
python3 demo.py conf/cube.conf test input.json result.json
```
\ No newline at end of file
{"values": [{"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}]}
{"values": [{"status": 4294967295, "value": ""}]}
......@@ -3,3 +3,24 @@ add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif()
if(CLIENT)
FILE(GLOB client_srcs include/*.h src/client.cpp src/brpc_client.cpp)
add_library(client ${client_srcs})
add_dependencies(client utils sdk-cpp)
target_link_libraries(client utils sdk-cpp)
endif()
if(CLIENT)
include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
add_executable(simple_client example/simple_client.cpp)
add_dependencies(simple_client utils sdk-cpp client)
target_link_libraries(simple_client -Wl,--whole-archive
-Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
target_link_libraries(simple_client utils)
target_link_libraries(simple_client sdk-cpp)
target_link_libraries(simple_client client)
endif()
\ No newline at end of file
# 用于Paddle Serving的C++客户端
(简体中文|[English](./README.md))
## 请求BRPC-Server
### 服务端启动
以fit_a_line模型为例,服务端启动与常规BRPC-Server端启动命令一样。
```
cd ../../python/examples/fit_a_line
sh get_data.sh
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
### 客户端预测
客户端目前支持BRPC
目前已经实现了BRPC的封装函数,详见[brpc_client.cpp](./src/brpc_client.cpp)
```
./simple_client --client_conf="uci_housing_client/serving_client_conf.prototxt" --server_port="127.0.0.1:9393" --test_type="brpc" --sample_type="fit_a_line"
```
更多示例详见[simple_client.cpp](./example/simple_client.cpp)
| Argument | Type | Default | Description |
| ---------------------------------------------- | ---- | ------------------------------------ | ----------------------------------------------------- |
| `client_conf` | str | `"serving_client_conf.prototxt"` | Path of client conf |
| `server_port` | str | `"127.0.0.1:9393"` | Exposed ip:port of server |
| `test_type` | str | `"brpc"` | Mode of request "brpc" |
| `sample_type` | str | `"fit_a_line"` | Type of sample include "fit_a_line,bert" |
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <vector>
#include "core/general-client/include/brpc_client.h"
using baidu::paddle_serving::client::ServingClient;
using baidu::paddle_serving::client::ServingBrpcClient;
using baidu::paddle_serving::client::PredictorInputs;
using baidu::paddle_serving::client::PredictorOutputs;
DEFINE_string(server_port, "127.0.0.1:9292", "ip:port");
DEFINE_string(client_conf, "serving_client_conf.prototxt", "Path of client conf");
DEFINE_string(test_type, "brpc", "brpc");
// fit_a_line, bert
DEFINE_string(sample_type, "fit_a_line", "List: fit_a_line, bert");
namespace {
int prepare_fit_a_line(PredictorInputs& input, std::vector<std::string>& fetch_name) {
std::vector<float> float_feed = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
std::vector<int> float_shape = {1, 13};
std::string feed_name = "x";
fetch_name = {"price"};
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
return 0;
}
int prepare_bert(PredictorInputs& input, std::vector<std::string>& fetch_name) {
{
std::vector<float> float_feed(128, 0.0f);
float_feed[0] = 1.0f;
std::vector<int> float_shape = {1, 128, 1};
std::string feed_name = "input_mask";
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "position_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
feed[0] = 101;
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "input_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "segment_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
fetch_name = {"pooled_output"};
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::string url = FLAGS_server_port;
std::string conf = FLAGS_client_conf;
std::string test_type = FLAGS_test_type;
std::string sample_type = FLAGS_sample_type;
LOG(INFO) << "url = " << url << ";"
<< "client_conf = " << conf << ";"
<< "test_type = " << test_type
<< "sample_type = " << sample_type;
std::unique_ptr<ServingClient> client;
// default type is brpc
// will add grpc&http in the future
if (test_type == "brpc") {
client.reset(new ServingBrpcClient());
} else {
client.reset(new ServingBrpcClient());
}
std::vector<std::string> confs;
confs.push_back(conf);
if (client->init(confs, url) != 0) {
LOG(ERROR) << "Failed to init client!";
return 0;
}
PredictorInputs input;
PredictorOutputs output;
std::vector<std::string> fetch_name;
if (sample_type == "fit_a_line") {
prepare_fit_a_line(input, fetch_name);
}
else if (sample_type == "bert") {
prepare_bert(input, fetch_name);
}
else {
prepare_fit_a_line(input, fetch_name);
}
if (client->predict(input, output, fetch_name, 0) != 0) {
LOG(ERROR) << "Failed to predict!";
}
else {
LOG(INFO) << output.print();
}
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi;
namespace baidu {
namespace paddle_serving {
namespace client {
class ServingBrpcClient : public ServingClient {
public:
ServingBrpcClient() {};
~ServingBrpcClient() {};
virtual int connect(const std::string server_port);
int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id);
private:
// generate default SDKConf
std::string gen_desc(const std::string server_port);
private:
PredictorApi _api;
Predictor* _predictor;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <memory>
namespace baidu {
namespace paddle_serving {
namespace predictor {
namespace general_model {
class Request;
class Response;
}
}
namespace client {
class PredictorInputs;
class PredictorOutputs;
class ServingClient {
public:
ServingClient() {};
virtual ~ServingClient() = default;
int init(const std::vector<std::string>& client_conf,
const std::string server_port);
int load_client_config(const std::vector<std::string>& client_conf);
virtual int connect(const std::string server_port) = 0;
virtual int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) = 0;
protected:
std::map<std::string, int> _feed_name_to_idx;
std::vector<std::string> _feed_name;
std::map<std::string, int> _fetch_name_to_idx;
std::map<std::string, std::string> _fetch_name_to_var_name;
std::map<std::string, int> _fetch_name_to_type;
std::vector<std::vector<int>> _shape;
std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
};
class PredictorData {
public:
PredictorData() {};
virtual ~PredictorData() {};
void add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 1);
void add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 0);
void add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 2);
void add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 20);
const std::map<std::string, std::vector<float>>& float_data_map() const {
return _float_data_map;
};
std::map<std::string, std::vector<float>>* mutable_float_data_map() {
return &_float_data_map;
};
const std::map<std::string, std::vector<int64_t>>& int64_data_map() const {
return _int64_data_map;
};
std::map<std::string, std::vector<int64_t>>* mutable_int64_data_map() {
return &_int64_data_map;
};
const std::map<std::string, std::vector<int32_t>>& int_data_map() const {
return _int32_data_map;
};
std::map<std::string, std::vector<int32_t>>* mutable_int_data_map() {
return &_int32_data_map;
};
const std::map<std::string, std::string>& string_data_map() const {
return _string_data_map;
};
std::map<std::string, std::string>* mutable_string_data_map() {
return &_string_data_map;
};
const std::map<std::string, std::vector<int>>& shape_map() const {
return _shape_map;
};
std::map<std::string, std::vector<int>>* mutable_shape_map() {
return &_shape_map;
};
const std::map<std::string, std::vector<int>>& lod_map() const {
return _lod_map;
};
std::map<std::string, std::vector<int>>* mutable_lod_map() {
return &_lod_map;
};
int get_datatype(std::string name) const;
void set_datatype(std::string name, int type);
std::string print();
private:
// used to print vector data map e.g. _float_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, std::vector<T2>>& map) {
std::ostringstream oss;
oss.str("");
oss.precision(6);
oss.setf(std::ios::fixed);
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, std::vector<T2>>::const_iterator it = map.begin();
typename std::map<T1, std::vector<T2>>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg;
const std::vector<T2>& v = it->second;
oss << v.size() << key_seg;
for (size_t i = 0; i < v.size(); ++i) {
if (i != v.size() - 1) {
oss << v[i] << val_seg;
}
else {
oss << v[i];
}
}
oss << "}";
}
return oss.str();
};
// used to print data map without vector e.g. _string_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, T2>& map) {
std::ostringstream oss;
oss.str("");
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, T2>::const_iterator it = map.begin();
typename std::map<T1, T2>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg
<< "size=" << it->second.size() << key_seg
<< "type=" << this->get_datatype(it->first);
oss << "}";
}
return oss.str();
};
protected:
std::map<std::string, std::vector<float>> _float_data_map;
std::map<std::string, std::vector<int64_t>> _int64_data_map;
std::map<std::string, std::vector<int32_t>> _int32_data_map;
std::map<std::string, std::string> _string_data_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::map<std::string, int> _datatype_map;
};
class PredictorInputs : public PredictorData {
public:
PredictorInputs() {};
virtual ~PredictorInputs() {};
// generate proto from inputs
// feed_name_to_idx: mapping alias name to idx
// feed_name: mapping idx to name
static int GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
predictor::general_model::Request& req);
};
class PredictorOutputs {
public:
struct PredictorOutput {
std::string engine_name;
PredictorData data;
};
PredictorOutputs() {};
virtual ~PredictorOutputs() {};
const std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>& datas() {
return _datas;
};
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>* mutable_datas() {
return &_datas;
};
void add_data(const std::shared_ptr<PredictorOutputs::PredictorOutput>& data) {
_datas.push_back(data);
};
std::string print();
void clear();
// Parse proto to outputs
// fetch_name: name of data to be output
// fetch_name_to_type: mapping of fetch_name to datatype
static int ParseProto(const predictor::general_model::Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs);
protected:
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>> _datas;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
......@@ -51,8 +51,13 @@ class ModelRes {
res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(),
res._tensor_alias_names.begin(),
res._tensor_alias_names.end());
}
ModelRes(ModelRes&& res) {
_engine_name = std::move(res._engine_name);
......@@ -65,10 +70,17 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
}
~ModelRes() {}
const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
......@@ -89,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]);
}
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name];
}
......@@ -105,6 +123,10 @@ class ModelRes {
_engine_name = engine_name;
}
const std::string& engine_name() { return _engine_name; }
const std::vector<std::string>& tensor_alias_names() {
return _tensor_alias_names;
}
ModelRes& operator=(ModelRes&& res) {
if (this != &res) {
_engine_name = std::move(res._engine_name);
......@@ -117,10 +139,17 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
}
return *this;
}
......@@ -130,8 +159,10 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names;
};
class PredictorRes {
......@@ -168,6 +199,14 @@ class PredictorRes {
const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
}
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_shape_by_name(name);
......@@ -193,11 +232,16 @@ class PredictorRes {
}
const std::string& variant_tag() { return _variant_tag; }
const std::vector<std::string>& get_engine_names() { return _engine_names; }
const std::vector<std::string>& get_tensor_alias_names(const int model_idx) {
_tensor_alias_names = _models[model_idx].tensor_alias_names();
return _tensor_alias_names;
}
private:
std::vector<ModelRes> _models;
std::string _variant_tag;
std::vector<std::string> _engine_names;
std::vector<std::string> _tensor_alias_names;
};
class PredictorClient {
......@@ -222,10 +266,14 @@ class PredictorClient {
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<py::array_t<int64_t>>& int_feed,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string>& string_feed,
const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape,
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/brpc_client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/util/include/timer.h"
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
DEFINE_bool(profile_client, false, "");
DEFINE_bool(profile_server, false, "");
#define BRPC_MAX_BODY_SIZE 512 * 1024 * 1024
namespace baidu {
namespace paddle_serving {
namespace client {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
using configure::SDKConf;
using configure::VariantConf;
using configure::Predictor;
using configure::VariantConf;
int ServingBrpcClient::connect(const std::string server_port) {
brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
if (_api.create(gen_desc(server_port)) != 0) {
LOG(ERROR) << "Predictor Creation Failed";
return -1;
}
// _api.thrd_initialize();
return 0;
}
std::string ServingBrpcClient::gen_desc(const std::string server_port) {
// default config for brpc
SDKConf sdk_conf;
Predictor* predictor = sdk_conf.add_predictors();
predictor->set_name("general_model");
predictor->set_service_name("baidu.paddle_serving.predictor.general_model.GeneralModelService");
predictor->set_endpoint_router("WeightedRandomRender");
predictor->mutable_weighted_random_render_conf()->set_variant_weight_list("100");
VariantConf* predictor_var = predictor->add_variants();
predictor_var->set_tag("default_tag_1");
std::string cluster = "list://" + server_port;
predictor_var->mutable_naming_conf()->set_cluster(cluster);
VariantConf* var = sdk_conf.mutable_default_variant_conf();
var->set_tag("default");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_connection_conf()->set_rpc_timeout_ms(200000);
var->mutable_connection_conf()->set_connect_retry_count(2);
var->mutable_connection_conf()->set_max_connection_per_host(100);
var->mutable_connection_conf()->set_hedge_request_timeout_ms(-1);
var->mutable_connection_conf()->set_hedge_fetch_retry_count(2);
var->mutable_connection_conf()->set_connection_type("pooled");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_naming_conf()->set_cluster_filter_strategy("Default");
var->mutable_naming_conf()->set_load_balance_strategy("la");
var->mutable_rpc_parameter()->set_compress_type(0);
var->mutable_rpc_parameter()->set_package_size(20);
var->mutable_rpc_parameter()->set_protocol("baidu_std");
var->mutable_rpc_parameter()->set_max_channel_per_request(3);
return sdk_conf.SerializePartialAsString();
}
int ServingBrpcClient::predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) {
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// thread initialize for StubTLS
_api.thrd_initialize();
std::string variant_tag;
// predictor is bound to request with brpc::Controller
_predictor = _api.fetch_predictor("general_model", &variant_tag);
if (_predictor == NULL) {
LOG(ERROR) << "Failed fetch predictor so predict error!";
return -1;
}
// predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "variant_tag:" << variant_tag;
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
if (PredictorInputs::GenProto(inputs, _feed_name_to_idx, _feed_name, req) != 0) {
LOG(ERROR) << "Failed to preprocess req!";
return -1;
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
return -1;
}
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
if (PredictorOutputs::ParseProto(res, fetch_name, _fetch_name_to_type, outputs) != 0) {
LOG(ERROR) << "Failed to post_process res!";
return -1;
}
postprocess_end = timeline.TimeStampUS();
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "pid:" << getpid() << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
// release predictor
_api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0;
}
} // namespace general_model
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace client {
using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) {
if (load_client_config(client_conf) != 0) {
LOG(ERROR) << "Failed to load client config";
return -1;
}
// pure virtual func, subclass implementation
if (connect(server_port) != 0) {
LOG(ERROR) << "Failed to connect";
return -1;
}
return 0;
}
int ServingClient::load_client_config(const std::vector<std::string> &conf_file) {
try {
GeneralModelConfig model_config;
if (configure::read_proto_conf(conf_file[0].c_str(), &model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[0];
return -1;
}
_feed_name_to_idx.clear();
_fetch_name_to_idx.clear();
_shape.clear();
int feed_var_num = model_config.feed_var_size();
_feed_name.clear();
VLOG(2) << "feed var num: " << feed_var_num;
for (int i = 0; i < feed_var_num; ++i) {
_feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
VLOG(2) << "feed [" << i << "]"
<< " name: " << model_config.feed_var(i).name();
_feed_name.push_back(model_config.feed_var(i).name());
VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
<< " index: " << i;
std::vector<int> tmp_feed_shape;
VLOG(2) << "feed"
<< "[" << i << "] shape:";
for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
tmp_feed_shape.push_back(model_config.feed_var(i).shape(j));
VLOG(2) << "shape[" << j << "]: " << model_config.feed_var(i).shape(j);
}
_type.push_back(model_config.feed_var(i).feed_type());
VLOG(2) << "feed"
<< "[" << i
<< "] feed type: " << model_config.feed_var(i).feed_type();
_shape.push_back(tmp_feed_shape);
}
if (conf_file.size() > 1) {
model_config.Clear();
if (configure::read_proto_conf(conf_file[conf_file.size() - 1].c_str(),
&model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[conf_file.size() - 1];
return -1;
}
}
int fetch_var_num = model_config.fetch_var_size();
VLOG(2) << "fetch_var_num: " << fetch_var_num;
for (int i = 0; i < fetch_var_num; ++i) {
_fetch_name_to_idx[model_config.fetch_var(i).alias_name()] = i;
VLOG(2) << "fetch [" << i << "]"
<< " alias name: " << model_config.fetch_var(i).alias_name();
_fetch_name_to_var_name[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).name();
_fetch_name_to_type[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).fetch_type();
}
} catch (std::exception &e) {
LOG(ERROR) << "Failed load general model config" << e.what();
return -1;
}
return 0;
}
void PredictorData::add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_float_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int64_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int32_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_string_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
int PredictorData::get_datatype(std::string name) const {
std::map<std::string, int>::const_iterator it = _datatype_map.find(name);
if (it != _datatype_map.end()) {
return it->second;
}
return 0;
}
void PredictorData::set_datatype(std::string name, int type) {
_datatype_map[name] = type;
}
std::string PredictorData::print() {
std::string res;
res.append(map2string<std::string, float>(_float_data_map));
res.append(map2string<std::string, int64_t>(_int64_data_map));
res.append(map2string<std::string, int32_t>(_int32_data_map));
res.append(map2string<std::string, std::string>(_string_data_map));
return res;
}
int PredictorInputs::GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
Request& req) {
const std::map<std::string, std::vector<float>>& float_feed_map = inputs.float_data_map();
const std::map<std::string, std::vector<int64_t>>& int64_feed_map = inputs.int64_data_map();
const std::map<std::string, std::vector<int32_t>>& int32_feed_map = inputs.int_data_map();
const std::map<std::string, std::string>& string_feed_map = inputs.string_data_map();
const std::map<std::string, std::vector<int>>& shape_map = inputs.shape_map();
const std::map<std::string, std::vector<int>>& lod_map = inputs.lod_map();
VLOG(2) << "float feed name size: " << float_feed_map.size();
VLOG(2) << "int feed name size: " << int64_feed_map.size();
VLOG(2) << "string feed name size: " << string_feed_map.size();
// batch is already in Tensor.
for (std::map<std::string, std::vector<float>>::const_iterator iter = float_feed_map.begin();
iter != float_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<float>& float_data = iter->second;
const std::vector<int>& float_shape = shape_map.at(name);
const std::vector<int>& float_lod = lod_map.at(name);
// default datatype = P_FLOAT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
VLOG(2) << "prepare float feed " << name << " idx " << idx;
int total_number = float_data.size();
Tensor *tensor = req.add_tensor();
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape.size();
for (uint32_t j = 0; j < float_shape.size(); ++j) {
tensor->add_shape(float_shape[j]);
}
for (uint32_t j = 0; j < float_lod.size(); ++j) {
tensor->add_lod(float_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), float_data.data(), total_number * sizeof(float));
}
for (std::map<std::string, std::vector<int64_t>>::const_iterator iter = int64_feed_map.begin();
iter != int64_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int64_t>& int64_data = iter->second;
const std::vector<int>& int64_shape = shape_map.at(name);
const std::vector<int>& int64_lod = lod_map.at(name);
// default datatype = P_INT64
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int64_data.size();
for (uint32_t j = 0; j < int64_shape.size(); ++j) {
tensor->add_shape(int64_shape[j]);
}
for (uint32_t j = 0; j < int64_lod.size(); ++j) {
tensor->add_lod(int64_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), int64_data.data(), total_number * sizeof(int64_t));
}
for (std::map<std::string, std::vector<int32_t>>::const_iterator iter = int32_feed_map.begin();
iter != int32_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int32_t>& int32_data = iter->second;
const std::vector<int>& int32_shape = shape_map.at(name);
const std::vector<int>& int32_lod = lod_map.at(name);
// default datatype = P_INT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int32_data.size();
for (uint32_t j = 0; j < int32_shape.size(); ++j) {
tensor->add_shape(int32_shape[j]);
}
for (uint32_t j = 0; j < int32_lod.size(); ++j) {
tensor->add_lod(int32_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), int32_data.data(), total_number * sizeof(int32_t));
}
for (std::map<std::string, std::string>::const_iterator iter = string_feed_map.begin();
iter != string_feed_map.end();
++iter) {
std::string name = iter->first;
const std::string& string_data = iter->second;
const std::vector<int>& string_shape = shape_map.at(name);
const std::vector<int>& string_lod = lod_map.at(name);
// default datatype = P_STRING
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
for (uint32_t j = 0; j < string_shape.size(); ++j) {
tensor->add_shape(string_shape[j]);
}
for (uint32_t j = 0; j < string_lod.size(); ++j) {
tensor->add_lod(string_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
if (datatype == P_STRING) {
const int string_shape_size = string_shape.size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_data);
break;
}
}
} else {
tensor->set_tensor_content(string_data);
}
}
return 0;
}
std::string PredictorOutputs::print() {
std::string res = "";
for (size_t i = 0; i < _datas.size(); ++i) {
res.append(_datas[i]->engine_name);
res.append(":");
res.append(_datas[i]->data.print());
res.append("\n");
}
return res;
}
void PredictorOutputs::clear() {
_datas.clear();
}
int PredictorOutputs::ParseProto(const Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs) {
VLOG(2) << "get model output num";
uint32_t model_num = res.outputs_size();
VLOG(2) << "model num: " << model_num;
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
VLOG(2) << "process model output index: " << m_idx;
auto& output = res.outputs(m_idx);
std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
std::make_shared<PredictorOutputs::PredictorOutput>();
predictor_output->engine_name = output.engine_name();
PredictorData& predictor_data = predictor_output->data;
std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
std::map<std::string, std::string>& string_data_map = *predictor_output->data.mutable_string_data_map();
std::map<std::string, std::vector<int>>& shape_map = *predictor_output->data.mutable_shape_map();
std::map<std::string, std::vector<int>>& lod_map = *predictor_output->data.mutable_lod_map();
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
shape_map[name][i] = output.tensor(idx).shape(i);
}
int lod_size = output.tensor(idx).lod_size();
if (lod_size > 0) {
lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
lod_map[name][i] = output.tensor(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "fetch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size();
int64_data_map[name] = std::vector<int64_t>(
output.tensor(idx).int64_data().begin(),
output.tensor(idx).int64_data().begin() + size);
} else if (fetch_name_to_type[name] == P_FLOAT32) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.tensor(idx).float_data_size();
float_data_map[name] = std::vector<float>(
output.tensor(idx).float_data().begin(),
output.tensor(idx).float_data().begin() + size);
} else if (fetch_name_to_type[name] == P_INT32) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.tensor(idx).int_data_size();
int32_data_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8
|| fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
}
predictor_data.set_datatype(name, output.tensor(idx).elem_type());
idx += 1;
}
outputs.add_data(predictor_output);
}
return 0;
}
} // namespace client
} // namespace paddle_serving
} // namespace baidu
......@@ -25,7 +25,22 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
std::once_flag gflags_init_flag;
namespace py = pybind11;
......@@ -152,10 +167,14 @@ int PredictorClient::numpy_predict(
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape,
......@@ -168,15 +187,14 @@ int PredictorClient::numpy_predict(
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
int fetch_name_num = fetch_name.size();
_api.thrd_initialize();
std::string variant_tag;
_predictor = _api.fetch_predictor("general_model", &variant_tag);
predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "int feed name size: " << int32_feed_name.size();
VLOG(2) << "int feed name size: " << int64_feed_name.size();
VLOG(2) << "string feed name size: " << string_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
......@@ -193,7 +211,11 @@ int PredictorClient::numpy_predict(
tensor_vec.push_back(req.add_tensor());
}
for (auto &name : int_feed_name) {
for (auto &name : int32_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
for (auto &name : int64_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
......@@ -233,37 +255,63 @@ int PredictorClient::numpy_predict(
}
vec_idx = 0;
for (auto &name : int_feed_name) {
for (auto &name : int32_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size();
int nbytes = int32_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int32_feed[vec_idx].data(0));
int total_number = int32_feed[vec_idx].size();
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
for (uint32_t j = 0; j < int32_shape[vec_idx].size(); ++j) {
tensor->add_shape(int32_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
for (uint32_t j = 0; j < int32_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int32_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
if (_type[idx] == P_INT64) {
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
} else {
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++;
}
// Individual INT_64 feed data of int_input to tensor_content
vec_idx = 0;
for (auto &name : int64_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int64_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int64_feed[vec_idx].data(0));
int total_number = int64_feed[vec_idx].size();
for (uint32_t j = 0; j < int64_shape[vec_idx].size(); ++j) {
tensor->add_shape(int64_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int64_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int64_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++;
}
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
......@@ -279,22 +327,27 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
if (_type[idx] != P_STRING) {
tensor->set_elem_type(_type[idx]);
tensor->set_tensor_content(string_feed[vec_idx]);
} else {
tensor->set_elem_type(P_STRING);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
}
}
vec_idx++;
......@@ -308,10 +361,8 @@ int PredictorClient::numpy_predict(
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_client) {
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
res.Clear();
......@@ -329,10 +380,12 @@ int PredictorClient::numpy_predict(
auto output = res.outputs(m_idx);
ModelRes model;
model.set_engine_name(output.engine_name());
int idx = 0;
for (auto &name : fetch_name) {
// 在ResponseOp处,已经按照fetch_name对输出数据进行了处理
// 所以,输出的数据与fetch_name是严格对应的,按顺序处理即可。
for (int idx = 0; idx < output.tensor_size(); ++idx) {
// int idx = _fetch_name_to_idx[name];
const std::string name = output.tensor(idx).alias_name();
model._tensor_alias_names.push_back(name);
int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
......@@ -347,13 +400,7 @@ int PredictorClient::numpy_predict(
model._lod_map[name][i] = output.tensor(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "ferch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size();
......@@ -372,8 +419,16 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
}
idx += 1;
}
predict_res_batch.add_model_res(std::move(model));
}
......@@ -403,6 +458,36 @@ int PredictorClient::numpy_predict(
}
_api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0;
}
} // namespace general_model
......
......@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>(
......@@ -69,7 +82,10 @@ PYBIND11_MODULE(serving_client, m) {
})
.def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
.def("get_engine_names",
[](PredictorRes &self) { return self.get_engine_names(); });
[](PredictorRes &self) { return self.get_engine_names(); })
.def("get_tensor_alias_names", [](PredictorRes &self, int model_idx) {
return self.get_tensor_alias_names(model_idx);
});
py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
.def(py::init())
......@@ -101,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape,
......@@ -117,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) {
float_feed_name,
float_shape,
float_lod_slot_batch,
int_feed,
int_feed_name,
int_shape,
int_lod_slot_batch,
int32_feed,
int32_feed_name,
int32_shape,
int32_lod_slot_batch,
int64_feed,
int64_feed_name,
int64_shape,
int64_lod_slot_batch,
string_feed,
string_feed_name,
string_shape,
......
......@@ -191,42 +191,64 @@ int GeneralDetectionOp::inference() {
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
for (int i = boxes.size() - 1; i >= 0; i--) {
crop_img = GetRotateCropImage(img, boxes[i]);
float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
float max_wh_ratio = 0.0f;
std::vector<cv::Mat> crop_imgs;
std::vector<cv::Mat> resize_imgs;
int max_resize_w = 0;
int max_resize_h = 0;
int box_num = boxes.size();
std::vector<std::vector<float>> output_rec;
for (int i = 0; i < box_num; ++i) {
cv::Mat line_img = GetRotateCropImage(img, boxes[i]);
float wh_ratio = float(line_img.cols) / float(line_img.rows);
max_wh_ratio = max_wh_ratio > wh_ratio ? max_wh_ratio : wh_ratio;
crop_imgs.push_back(line_img);
}
for (int i = 0; i < box_num; ++i) {
cv::Mat resize_img;
crop_img = crop_imgs[i];
this->resize_op_rec.Run(
crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_);
crop_img, resize_img, max_wh_ratio, this->use_tensorrt_);
this->normalize_op_.Run(
&resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_);
std::vector<float> output_rec(
1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f);
this->permute_op_.Run(&resize_img_rec, output_rec.data());
// Inference.
output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
databuf_size_out = out_num * sizeof(float);
databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
if (!databuf_data_out) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
return -1;
}
memcpy(databuf_data_out, output_rec.data(), databuf_size_out);
databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
paddle::PaddleTensor tensor_out;
tensor_out.name = "image";
tensor_out.dtype = paddle::PaddleDType::FLOAT32;
tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
tensor_out.data = paddleBuf;
out->push_back(tensor_out);
&resize_img, this->mean_rec, this->scale_rec, this->is_scale_);
max_resize_w = std::max(max_resize_w, resize_img.cols);
max_resize_h = std::max(max_resize_h, resize_img.rows);
resize_imgs.push_back(resize_img);
}
int buf_size = 3 * max_resize_h * max_resize_w;
output_rec = std::vector<std::vector<float>>(box_num,
std::vector<float>(buf_size, 0.0f));
for (int i = 0; i < box_num; ++i) {
resize_img_rec = resize_imgs[i];
this->permute_op_.Run(&resize_img_rec, output_rec[i].data());
}
// Inference.
output_shape = {box_num, 3, max_resize_h, max_resize_w};
out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
databuf_size_out = out_num * sizeof(float);
databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
if (!databuf_data_out) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
return -1;
}
int offset = buf_size * sizeof(float);
for (int i = 0; i < box_num; ++i) {
memcpy(databuf_data_out + i * offset, output_rec[i].data(), offset);
}
databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
paddle::PaddleTensor tensor_out;
tensor_out.name = "image";
tensor_out.dtype = paddle::PaddleDType::FLOAT32;
tensor_out.shape = output_shape;
tensor_out.data = paddleBuf;
out->push_back(tensor_out);
}
out->erase(out->begin(), out->begin() + infer_outnum);
......
......@@ -63,7 +63,7 @@ class GeneralDetectionOp
double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0;
double det_db_unclip_ratio_ = 1.5;
std::vector<float> mean_det = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
......
......@@ -20,6 +20,7 @@
#include <unordered_map>
#include <utility>
#include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/framework/cache.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
......@@ -36,10 +37,11 @@ using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::CubeCache;
// DistKV Infer Op: seek cube and then call paddle inference
// op seq: general_reader-> dist_kv_infer -> general_response
int GeneralDistKVInferOp::inference() {
int GeneralDistKVInferOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
......@@ -60,8 +62,8 @@ int GeneralDistKVInferOp::inference() {
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) {
LOG(ERROR) << "(logid=" << log_id << ") output_blob is nullptr,error";
return -1;
LOG(ERROR) << "(logid=" << log_id << ") output_blob is nullptr,error";
return -1;
}
output_blob->SetLogId(log_id);
......@@ -70,21 +72,30 @@ int GeneralDistKVInferOp::inference() {
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
Timer timeline;
timeline.Start();
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
std::vector<uint64_t> keys;
std::vector<uint64_t> unique_keys;
std::unordered_map<uint64_t, rec::mcube::CubeValue *> key_map;
std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; // sparse inputs counts, sparse would seek cube
int dense_count = 0; // dense inputs counts, dense would directly call paddle infer
// sparse inputs counts, sparse would seek cube
int sparse_count = 0;
// dense inputs counts, dense would directly call paddle infer
int dense_count = 0;
std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
size_t key_len = 0;
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
// dense input type is not int64
++dense_count;
continue;
}
// sparse input type is int64
++sparse_count;
size_t elem_num = 1;
for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
elem_num *= in->at(i).shape[s];
......@@ -94,7 +105,8 @@ int GeneralDistKVInferOp::inference() {
dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
}
keys.resize(key_len);
VLOG(3) << "(logid=" << log_id << ") cube number of keys to look up: " << key_len;
unique_keys.resize(key_len);
int key_idx = 0;
for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
std::copy(dataptr_size_pairs[i].first,
......@@ -102,20 +114,81 @@ int GeneralDistKVInferOp::inference() {
keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second;
}
// filter dumplicate keys
int unique_keys_count = 0;
for (size_t i = 0; i < keys.size(); ++i) {
if (key_map.find(keys[i]) == key_map.end()) {
key_map[keys[i]] = nullptr;
unique_keys[unique_keys_count++] = keys[i];
}
}
unique_keys.resize(unique_keys_count);
VLOG(1) << "(logid=" << log_id
<< ") cube number of keys to look up: " << key_len
<< " uniq keys: " << unique_keys_count;
// fitler cache keys
size_t hit_counts = 0;
int64_t seek_cache_start = timeline.TimeStampUS();
CubeCache *p_cube_cache =
InferManager::instance().get_cube_cache(engine_name().c_str());
if (p_cube_cache != nullptr) {
for (size_t i = 0; i < unique_keys_count; ++i) {
rec::mcube::CubeValue *hit_val = p_cube_cache->get_data(unique_keys[i]);
if (hit_val) {
// LOG(WARNING) << "Hit one cache. key:" << unique_keys[i];
key_map[unique_keys[i]] = hit_val;
if (hit_counts % 100 == 0) {
LOG(WARNING) << "hit cache! key:" << unique_keys[i]
<< " value:" << hit_val->buff;
}
unique_keys[i] = 0;
++hit_counts;
}
}
} else {
LOG(WARNING) << "get cube cache fail. model: " << engine_name();
}
// clear unique keys which hit caches
if (hit_counts > 0) {
for (auto it = unique_keys.begin(); it < unique_keys.end();) {
if (*it == 0) {
it = unique_keys.erase(it);
--unique_keys_count;
} else {
++it;
}
}
}
int64_t seek_cache_end = timeline.TimeStampUS();
VLOG(2) << "cache hit " << hit_counts
<< " keys in cube cache, last unique_keys:" << unique_keys.size()
<< " , seek_time:" << seek_cache_end - seek_cache_start;
// seek sparse params
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given.";
return -1;
}
// gather keys and seek cube servers, put results in values
int ret = cube->seek(table_names[0], keys, &values);
VLOG(3) << "(logid=" << log_id << ") cube seek status: " << ret;
int64_t seek_start = timeline.TimeStampUS();
int ret = cube->seek(table_names[0], unique_keys, &values);
int64_t seek_end = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id << ") cube seek status: " << ret
<< " , unique_key: " << unique_keys.size()
<< " , seek_time: " << seek_end - seek_start;
for (size_t i = 0; i < unique_keys.size(); ++i) {
key_map[unique_keys[i]] = &values[i];
}
if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null";
}
// EMBEDDING_SIZE means the length of sparse vector, user can define length here.
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
// size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float);
//size_t EMBEDDING_SIZE = 9;
TensorVector sparse_out;
sparse_out.resize(sparse_count);
TensorVector dense_out;
......@@ -126,8 +199,10 @@ int GeneralDistKVInferOp::inference() {
std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
std::shared_ptr<PaddleGeneralModelConfig> model_config = resource.get_general_model_config().front();
//copy data to tnsor
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config().front();
int cube_key_found = 0;
int cube_key_miss = 0;
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
dense_out[dense_idx] = in->at(i);
......@@ -142,43 +217,75 @@ int GeneralDistKVInferOp::inference() {
sparse_out[sparse_idx].lod[x].begin());
}
sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
sparse_out[sparse_idx].shape.push_back(sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float));
float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
if (!dst_ptr) {
VLOG(2) << "dst_ptr is null. sparse_idx:" << sparse_idx;
continue;
}
for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
memcpy(data_ptr,
values[cube_val_idx].buff.data(),
values[cube_val_idx].buff.size());
cube_val_idx++;
uint64_t cur_key = keys[cube_val_idx];
rec::mcube::CubeValue *cur_val = key_map[cur_key];
if (cur_val->buff.size() == 0) {
memset(data_ptr, (float)0.0, sizeof(float) * EMBEDDING_SIZE);
++cube_key_miss;
++cube_val_idx;
continue;
}
// The data generated by pslib has 10 bytes of information to be filtered
// out
memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size() );
// VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
// data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
// <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "
// <<data_ptr[7] << ", " <<data_ptr[8];
++cube_key_found;
++cube_val_idx;
}
++sparse_idx;
}
VLOG(3) << "(logid=" << log_id << ") sparse tensor load success.";
bool cube_fail = (cube_key_found == 0);
if (cube_fail) {
LOG(WARNING) << "(logid=" << log_id << ") cube seek fail";
}
VLOG(2) << "(logid=" << log_id << ") cube key found: " << cube_key_found
<< " , cube key miss: " << cube_key_miss;
VLOG(2) << "(logid=" << log_id << ") sparse tensor load success.";
timeline.Pause();
VLOG(2) << "dist kv, cube and datacopy time: " << timeline.ElapsedUS();
TensorVector infer_in;
infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
// call paddle inference here
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id << ") Failed do infer in fluid model: " << engine_name();
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1;
}
int64_t end = timeline.TimeStampUS();
if (cube_fail) {
float *out_ptr = static_cast<float *>(out->at(0).data.data());
out_ptr[0] = 0.0;
}
timeline.Pause();
VLOG(2) << "dist kv, pure paddle infer time: " << timeline.ElapsedUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
return 0;
}
DEFINE_OP(GeneralDistKVInferOp);
......
......@@ -31,7 +31,22 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() {
// read request from client
......@@ -78,6 +93,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0;
int64_t elem_size = 0;
int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i);
......@@ -86,19 +102,38 @@ int GeneralReaderOp::inference() {
elem_size = 0;
databuf_size = 0;
elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type;
src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// copy bytes from tensor content to TensorVector
elem_size = 1;
paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char);
......@@ -109,8 +144,18 @@ int GeneralReaderOp::inference() {
// now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
}
}
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
}
// implement lod tensor here
// only support 1-D lod
// TODO(HexToString): support 2-D lod
......@@ -141,44 +186,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back();
}
if (elem_type == P_INT64) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int64_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
/*
int elem_num = tensor.int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) {
void* dst_ptr = out->at(i).data.data();
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
// For common data, we just copy from src to dst
// For string data, we need to iterate through all str
if (elem_type != P_STRING) {
memcpy(dst_ptr, src_ptr, databuf_size);
} else {
char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0);
......
......@@ -74,10 +74,19 @@ int GeneralResponseOp::inference() {
// and the order of Output is the same as the prototxt FetchVar.
// otherwise, you can only get the Output by the corresponding of
// Name -- Alias_name.
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
if (req->fetch_var_names_size() > 0) {
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
} else {
fetch_index.resize(model_config->_fetch_alias_name.size());
for (int i = 0; i < model_config->_fetch_alias_name.size(); ++i) {
fetch_index[i] =
model_config
->_fetch_alias_name_to_index[model_config->_fetch_alias_name[i]];
}
}
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
......@@ -105,7 +114,7 @@ int GeneralResponseOp::inference() {
// fetch_index is the real index in FetchVar of Fetchlist
// for example, FetchVar = {0:A, 1:B, 2:C}
// FetchList = {0:C,1:A}, at this situation.
// fetch_index = [2,0], C`index = 2 and A`index = 0
// fetch_index = [2,0], C`index = 2 and A`index = 0
for (auto &idx : fetch_index) {
Tensor *tensor = output->add_tensor();
tensor->set_name(in->at(idx).name);
......@@ -159,6 +168,21 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
} else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::FLOAT16) {
tensor->set_elem_type(5);
VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}
VLOG(2) << "(logid=" << log_id << ") fetch var ["
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
import "pds_option.proto";
import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model;
......@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
......
......@@ -276,43 +276,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print(
" baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
inference_body.c_str(),
"name",
class_name,
"service",
......@@ -1021,45 +1043,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print(
" brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
inference_body.c_str(),
"name",
class_name,
"service",
......@@ -1492,11 +1534,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) {
......@@ -1509,10 +1546,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name",
field_name);
}
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
}
printer->Print(
......
......@@ -25,7 +25,7 @@ DEFINE_int32(port, 8010, "");
DEFINE_string(workflow_path, "./conf", "");
DEFINE_string(workflow_file, "workflow.prototxt", "");
DEFINE_string(inferservice_path, "./conf", "");
DEFINE_string(inferservice_file, "service.prototxt", "");
DEFINE_string(inferservice_file, "infer_service.prototxt", "");
DEFINE_string(logger_path, "./conf", "");
DEFINE_string(logger_file, "log.conf", "");
DEFINE_string(resource_path, "./conf", "");
......
FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp)
FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp ${CMAKE_CURRENT_LIST_DIR}/../../cube/cube-builder/src/seqfile_reader.cpp)
LIST(APPEND pdserving_srcs ${framework_srcs})
LIST(APPEND pclient_srcs ${framework_srcs})
......@@ -26,9 +26,90 @@
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/memory.h"
// this file is included by bsf.h
namespace im {
namespace bsf {
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_init(BatchTasks<TaskT>& batchTask) {
// 双检锁,减少加锁的粒度
if (!fetch_init) {
if (taskmeta_num > 1) {
// 对于task被拆分为多个taskmeta,需要加锁。
AutoMutex lock(task_mut);
task_fetch_create(batchTask);
} else {
// 对于task只有1个taskmeta,不需要加锁。
task_fetch_create(batchTask);
}
}
return true;
}
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
if (!fetch_init) {
vector_fetch_lod_index = batchTask.vector_fetch_lod_index;
set_fetch_nobatch_index = batchTask.set_fetch_nobatch_index;
OutVectorT taskMetaOutLodTensor;
size_t fetchvar_num = batchTask._batch_out.size();
for (size_t fetchvar_index = 0; fetchvar_index < fetchvar_num;
++fetchvar_index) {
size_t fetchvar_bytesize_index =
batchTask.fetchvar_bytesize(fetchvar_index);
size_t fetchvar_batch = 0;
// 1. nobatch fetchvar情况
if (set_fetch_nobatch_index.size() > 0 &&
set_fetch_nobatch_index.find(fetchvar_index) !=
set_fetch_nobatch_index.end()) {
fetchvar_batch = 1;
} else if (vector_fetch_lod_index.size() > 0 &&
std::find(vector_fetch_lod_index.begin(),
vector_fetch_lod_index.end(),
fetchvar_index) != vector_fetch_lod_index.end()) {
// lod fetchvar情况,此时无法确定总的shape[0]
// 根据task中的task_num总数开辟task_num个临时空间
// 每个lod型的fetchvar拷贝到对应的临时空间中
// 最后再计算临时空间的总量,合并fetchvar和lod
fetchvar_batch = 0;
} else {
// 普通fetchvar情况,此时该Task总的fetchvar_batch =
// 输入的总的batch_size()
fetchvar_batch = batch_size();
}
paddle::PaddleTensor tensor_out;
tensor_out.name = batchTask._batch_out[fetchvar_index].name;
tensor_out.dtype =
paddle::PaddleDType(batchTask._batch_out[fetchvar_index].dtype);
tensor_out.shape = batchTask._batch_out[fetchvar_index].shape;
tensor_out.shape[0] = fetchvar_batch;
if (fetchvar_batch != 0) {
// 此时 lod 为空。
tensor_out.lod = batchTask._batch_out[fetchvar_index].lod;
// resize all batch memory at one time
size_t databuf_size = fetchvar_batch * fetchvar_bytesize_index;
tensor_out.data.Resize(databuf_size);
} else {
// 当taskmeta_num = 1时,由于同时只有一个taskMeta操作task
// 不涉及线程安全问题,所以此时可以直接由taskMeta->task->resize->copy
// 当task被分为多个taskMeta时,需要临时对象记录
// 收齐后再一起合并
if (taskmeta_num > 1) {
taskMetaOutLodTensor.push_back(tensor_out);
}
}
outVectorT_ptr->push_back(tensor_out);
}
// outLodTensorVector实际是一个双层vector
// shape为taskmeta_num * vector_fetch_lod_index.size();
outLodTensorVector.resize(taskmeta_num, taskMetaOutLodTensor);
fetch_init = true;
}
return true;
}
template <typename TaskT>
void* TaskExecutor<TaskT>::thread_entry(void* args) {
ThreadContext<TaskT>* context = static_cast<ThreadContext<TaskT>*>(args);
......@@ -134,9 +215,10 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
LOG(ERROR) << "Failed get TaskT from object pool";
return TaskHandler<TaskT>::valid_handle();
}
task->clear();
/*
if (!BatchTasks<TaskT>::check_valid(in, out, _batch_align)) {
if (!BatchTasks<TaskT>::check_valid(in, out, _overrun)) {
LOG(ERROR) << "Invalid input & output";
return TaskHandler<TaskT>::valid_handle();
}
......@@ -156,9 +238,11 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr;
task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr;
if (!task->task_init()) {
LOG(ERROR) << "task->init() failed";
}
task->rem = task->batch_size();
task->index.store(0, butil::memory_order_relaxed);
AutoMutex lock(_mut);
_task_queue.push_back(task);
THREAD_COND_SIGNAL(&_cond);
......@@ -168,11 +252,12 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
// this function is accessed by multi thread.
// so AutoMutex at first.
// so batch.append_task is thread safe.
// so batchTask.append_task is thread safe.
// you dont need to add extra lock in append_task()
// task is already init.
template <typename TaskT>
bool TaskExecutor<TaskT>::move_task_to_batch(
BatchTasks<TaskT>& batch) { // NOLINT
BatchTasks<TaskT>& batchTask) { // NOLINT
AutoMutex lock(_mut);
while (_task_queue.empty()) {
THREAD_COND_WAIT(&_cond, &_mut);
......@@ -183,15 +268,65 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
return false;
}
TaskT* previous_task = nullptr;
while (!_task_queue.empty()) {
TaskT* task = _task_queue.front();
size_t rem = batch.append_task(task);
// 由于无法确定fetchVar是否为lod(即使输入是非lod,输出也可能是lod)
// 简单的处理方法是:task不能被拆分,即用户的请求可以合并一起预测,但不能拆分两个小部分去预测。
// 只需要设置engine的属性allow_split_request = false即可。
// 复杂的处理方法是允许拆分Task,无论是否包含lod.
// 难点:预测前,能够知道被拆成了几个taskmeta,但只有预测后,才知道有多少个fetchvar,多少个lod的fetchvar
// 所以,task中先要创建taskmeta_num* fetchvar
// num(lod类型的)个临时PaddleTensor(存储data及Lod)
// 由于多线程调度的单位是taskmeta,故只能在notify_task中,用taskmeta->task去创建
// 此时由于多个taskmeta对应一个task,存在多线程竞争,所以需要在task中加锁。
// 原子操作不可行,因为多个线程必须等待创建好上述的PaddleTensor后才能继续。
// 对于普通的fetch,也需要加锁去创建PaddleTensor,后续才能往里拷贝。
// _overrun表示,异步BatchTasks是否允许单次临时超过限制。
// _overrun为true时,即使BatchTasks剩下1-batch,也会全放入一个完整的Task,允许临时超限。
// _overrun为false时,不允许。
// 对于模型本身有最大Batch限制的情况,应将该值设为false,默认为false。
// 对于模型本身无最大Batch限制,但自己设置了BatchTasks的最大Batch,可以考虑设置为True。
// _allow_split_request ==
// true,则允许拆分task.BatchTasks剩下1-batch,则会从下一个Task中拆出1-Batch
// _allow_split_request ==
// false,则每个task不会被拆分。BatchTasks剩下1-batch会被浪费
// 默认为true,允许拆分task从而使得空间利用率最大。
if (!batchTask.get_allow_split_request()) {
if (task->batch_size() > batchTask.get_rem_size() &&
!batchTask.get_overrun()) {
break;
}
}
// combine_task_valid负责判断是否能够合并
// 除最外层的shape外,内层shape应一致才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 以此保证batch.append_task(task)中的task的内层shape相同。
// 对于Shape[0] = 1 而!=batch的情况,因为合并时,取其中一个的值
// 所以要求该feedvar必须相等,才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 目前没有PaddleTensor和PaddleBuff没有重载==,所以只能比较内存.
// TODO(HexToString): 可以考虑后期支持AutoPadding.
if (previous_task != nullptr) {
if (!task->combine_task_valid(previous_task)) {
break;
}
}
size_t rem = batchTask.append_task(task);
previous_task = task;
if (task->rem <= 0) {
_task_queue.pop_front();
}
if (rem <= 0) break;
}
LOG(INFO) << "Number of tasks remaining in _task_queue is"
<< _task_queue.size();
return true;
}
......@@ -201,11 +336,12 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
// TaskT is from the SingleTon TaskExecutor`s _task_queue
// although TaskMeta is a local variable, but several TaskMeta may points to
// the same TaskT which is get from the SingleTon TaskExecutor`s _task_queue.
// put TaskMeta to the local variable BatchTasks<TaskT> batch.
// put TaskMeta to the local variable BatchTasks<TaskT> batchTask.
// batch.merge_tasks() and batch.notify_tasks() has no lock.
// BatchTasks<TaskT> batch itself is a local variable, it`s thread safe.
// If batch.merge_tasks() and batch.notify_tasks() do something to TaskMeta
// batchTask.merge_tasks() and batchTask.notify_tasks() has no lock.
// BatchTasks<TaskT> batchTask itself is a local variable, it`s thread safe.
// If batchTask.merge_tasks() and batchTask.notify_tasks() do something to
// TaskMeta
// you need to pay attention to that.
// Multi-Thread deal with different TaskMeta(cause it`s created as local
// variable)
......@@ -242,11 +378,23 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) {
return -1;
}
BatchTasks<TaskT> batch(_batch_size, _batch_align);
if (move_task_to_batch(batch)) {
batch.merge_tasks();
_fn(&batch.in(), &batch.out());
batch.notify_tasks();
// move_task_to_batch() take the original task from the `_task_queue`
// put the original task into its own Vector<taskmeta>
// the capacity of its own Vector<taskmeta> is decided by `_batch_size` or
// `_overrun`
// merge_tasks() move the imput-data into `_batch_in` from its own
// Vector<taskmeta>.
// because the predictor`s input is the `_batch_in`
// notify_tasks() move the output-data into every single taskmeta from
// `_batch_out`.
// because the predictor`s output is the `_batch_out`
BatchTasks<TaskT> batchTask(_batch_size, _overrun, _allow_split_request);
if (move_task_to_batch(batchTask)) {
batchTask.merge_tasks();
_fn(&batchTask.in(), &batchTask.out());
batchTask.notify_tasks();
}
}
......
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#include "core/predictor/framework/cache.h"
#include <dirent.h>
#include <sys/stat.h>
#include <fstream>
#include <string>
#include <utility>
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
int CubeCache::clear() {
for (auto it = _map_cache.begin(); it != _map_cache.end(); ++it) {
if (it->second) {
delete (it->second);
it->second = nullptr;
}
}
_map_cache.clear();
return 0;
}
rec::mcube::CubeValue* CubeCache::get_data(uint64_t key) {
auto it = _map_cache.find(key);
if (it != _map_cache.end()) {
return it->second;
}
return nullptr;
}
int CubeCache::reload_data(const std::string& cache_path) {
LOG(INFO) << "cube cache is loading data, path: " << cache_path;
DIR* dp = nullptr;
struct dirent* dirp = nullptr;
struct stat st;
// clear cache data
clear();
// loading data from cache files
if (stat(cache_path.c_str(), &st) < 0 || !S_ISDIR(st.st_mode)) {
LOG(ERROR) << "invalid cache path " << cache_path;
return -1;
}
if ((dp = opendir(cache_path.c_str())) == nullptr) {
LOG(ERROR) << "opendir " << cache_path << " fail.";
return -1;
}
while ((dirp = readdir(dp)) != nullptr) {
// filtering by file type.
if (dirp->d_type != DT_REG) {
continue;
}
// Filter upper-level directories and hidden files
if ((!strncmp(dirp->d_name, ".", 1)) || (!strncmp(dirp->d_name, "..", 2))) {
continue;
}
// Match the file whose name prefix is ​​'part-'
if (std::string(dirp->d_name).find("part-") != std::string::npos) {
SequenceFileRecordReader reader(cache_path + "/" + dirp->d_name);
if (reader.open() != 0) {
LOG(ERROR) << "open file failed! " << dirp->d_name;
continue;
}
if (reader.read_header() != 0) {
LOG(ERROR) << "read header error! " << dirp->d_name;
reader.close();
continue;
}
Record record(reader.get_header());
while (reader.next(&record) == 0) {
uint64_t key =
*reinterpret_cast<uint64_t*>(const_cast<char*>(record.key.data()));
auto it_find = _map_cache.find(key);
if (it_find != _map_cache.end()) {
// load dumplicate key
LOG(WARNING) << "Load dumplicate key:" << key
<< " from file:" << dirp->d_name;
continue;
}
rec::mcube::CubeValue* new_value = new rec::mcube::CubeValue();
new_value->error = 0;
new_value->buff.swap(record.value);
_map_cache.insert(std::make_pair(key, new_value));
}
LOG(WARNING) << "Load cube cache file " << dirp->d_name << " done.";
}
LOG(WARNING) << "Load all cube cache files done";
}
return 0;
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sys/types.h>
#include <numeric>
#include <string>
#include <unordered_map>
#include "core/cube/cube-api/include/cube_api.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
// Large models that use sparse parameters may use cube cache.
// When the cube cache exists, the model is required to be
// consistent with the version of the cube cache. Therefore,
// when the model is updated, the model and the cube cache are
// required to be reloaded at the same time.
// Load all cached data at once without updating, it's lock free
// switching two cube cache.
class CubeCache {
public:
CubeCache() {}
~CubeCache() { clear(); }
// clear cache data.
int clear();
// get cache data by key
rec::mcube::CubeValue* get_data(uint64_t key);
// reload all cache files from cache_path
int reload_data(const std::string& cache_path);
private:
// switching free lock, key type is uint64_t, value type is CubeValue*
std::unordered_map<uint64_t, rec::mcube::CubeValue*> _map_cache;
};
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
......@@ -21,6 +21,15 @@
#include <string>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/op_repository.h"
#ifdef BCLOUD
#include <base/atomicops.h>
#else
#include <butil/atomicops.h>
#endif
#include <errno.h>
#include "core/predictor/framework/resource.h"
using baidu::paddle_serving::predictor::Resource;
namespace baidu {
namespace paddle_serving {
......@@ -238,6 +247,77 @@ const Channel* DagView::get_response_channel(const uint64_t log_id) const {
return last_op->mutable_channel();
}
void* call_back(void* ori_args) {
Resource::instance().thread_initialize();
Args* args = (Args*)ori_args;
Op* op = static_cast<Op*>(args->_op);
uint64_t log_id = static_cast<uint64_t>(args->_log_id);
bool debug = static_cast<bool>(args->_debug);
args->errcode = op->process(log_id, debug);
return nullptr;
}
int ParallelDagView::execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size();
std::vector<THREAD_T> tids(node_size);
Args* args = new Args[node_size];
VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf;
Op* op = vnode->op;
TRACEPRINTF(
"(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
args[ni]._op = op;
args[ni]._log_id = log_id;
args[ni]._debug = (debug_os != NULL);
int rc = THREAD_CREATE(&tids[ni], NULL, call_back, (void*)(args + ni));
if (rc != 0) {
LOG(ERROR) << "failed to create ParallelDagView worker thread: index="
<< ni << ", rc=" << rc << ", errno=" << errno << ":"
<< strerror(errno);
delete[] args;
return -1;
}
}
for (uint32_t ni = 0; ni < node_size; ni++) {
THREAD_JOIN(tids[ni], NULL);
int errcode = args[ni].errcode;
Op* op = args[ni]._op;
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) {
LOG(ERROR) << "(logid=" << log_id
<< ") Execute failed, Op:" << op->debug_string();
delete[] args;
return errcode;
}
if (errcode > 0) {
LOG(INFO) << "(logid=" << log_id
<< ") Execute ignore, Op:" << op->debug_string();
continue;
}
if (debug_os) {
(*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}";
}
// LOG(DEBUG) << "Execute succ, Op:" << op->debug_string();
}
stage_time.stop();
PredictorMetric::GetInstance()->update_latency_metric(
STAGE_METRIC_PREFIX + vstage->full_name, stage_time.u_elapsed());
delete[] args;
return ERR_OK;
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
......@@ -24,7 +24,7 @@ namespace baidu {
namespace paddle_serving {
namespace predictor {
class Op;
// class Op;
struct ViewNode {
Op* op; // op->full_name == service_workflow_stageindex_opname
......@@ -75,11 +75,20 @@ class DagView {
Bus* _bus;
};
struct Args {
Op* _op;
uint64_t _log_id;
bool _debug;
int errcode;
};
// The derived DagView supports parallel execution
// strategy, by implments the execute_one_stage().
class ParallelDagView : public DagView {
public:
int execute_one_stage(ViewStage* vstage, butil::IOBufBuilder*) { return 0; }
virtual int execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os);
};
} // namespace predictor
......
......@@ -25,7 +25,8 @@ int ReloadableInferEngine::proc_initialize_impl(
_model_dir = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align();
_infer_overrun = conf.enable_overrun();
_allow_split_request = conf.allow_split_request();
_conf = conf;
......@@ -56,9 +57,6 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
}
// init bsf framework
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this));
......@@ -69,8 +67,10 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
boost::bind(&InferEngine::task_infer_impl, this, _1, _2));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_size(
_infer_batch_size);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_align(
_infer_batch_align);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_overrun(
_infer_overrun);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_allow_split_request(_allow_split_request);
if (im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].start(
_infer_thread_num) != 0) {
LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num;
......@@ -79,7 +79,8 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
LOG(WARNING) << "Enable batch schedule framework, thread_num:"
<< _infer_thread_num << ", batch_size:" << _infer_batch_size
<< ", enable_batch_align:" << _infer_batch_align;
<< ", enable_overrun:" << _infer_overrun
<< ", allow_split_request:" << _allow_split_request;
return 0;
}
......@@ -348,7 +349,7 @@ T* VersionedInferEngine::get_core() {
}
template <typename T>
T* VersionedInferEngine::get_core(uint64_t version) {
T* VersionedInferEngine::get_core(const uint64_t version) {
auto iter = _versions.find(version);
if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version;
......@@ -363,6 +364,15 @@ T* VersionedInferEngine::get_core(uint64_t version) {
return NULL;
}
CubeCache* VersionedInferEngine::get_cube_cache() {
InferEngine* engine = default_engine();
if (!engine) {
LOG(WARNING) << "fail to get default engine";
return nullptr;
}
return engine->get_cube_cache();
}
int VersionedInferEngine::proc_initialize_impl(
const configure::EngineDesc& conf, bool) {
return -1;
......@@ -382,6 +392,11 @@ int VersionedInferEngine::task_infer_impl(const void* in,
return -1;
}
int InferManager::set_taskexecutor_num(size_t total_engine_num) {
im::bsf::TaskExecutorVector<TaskT>::instance().resize(total_engine_num);
return 0;
}
int InferManager::proc_initialize(const char* path,
const char* file,
std::shared_ptr<int> engine_index_ptr) {
......@@ -391,8 +406,6 @@ int InferManager::proc_initialize(const char* path,
return -1;
}
uint32_t engine_num = model_toolkit_conf.engines_size();
im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr +
engine_num);
for (uint32_t ei = 0; ei < engine_num; ++ei) {
LOG(INFO) << "model_toolkit_conf.engines(" << ei
<< ").name: " << model_toolkit_conf.engines(ei).name();
......@@ -502,6 +515,15 @@ T* InferManager::get_core(const char* model_name) {
return NULL;
}
CubeCache* InferManager::get_cube_cache(const char* model_name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
return nullptr;
}
return it->second->get_cube_cache();
}
// Versioned inference interface
int InferManager::infer(const char* model_name,
const void* in,
......@@ -517,7 +539,7 @@ int InferManager::infer(const char* model_name,
}
template <typename T>
T* InferManager::get_core(const char* model_name, uint64_t version) {
T* InferManager::get_core(const char* model_name, const uint64_t version) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
......
此差异已折叠。
......@@ -135,6 +135,17 @@ int Resource::initialize(const std::string& path, const std::string& file) {
if (FLAGS_enable_model_toolkit) {
size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
// 此处暂时认为,每个model_toolkit仅包含一个engine
// 故认为 model_toolkit_num == engine总数
// 若以后出现model_toolkit仅包含多个engine
// 则应先for循环统计engine总数,再set_taskexecutor_num
// 切不可动态im::bsf::TaskExecutorVector<TaskT>::instance().resize
// TaskExecutor是线程池,内含锁,在engine进程初始化时已开始work加锁循环运行了
// 之后再resize内存搬运,会导致work使用原锁,而搬运后的TaskExecutor的锁内存已改变
if (InferManager::instance().set_taskexecutor_num(model_toolkit_num) != 0) {
LOG(ERROR) << "failed set_taskexecutor_num";
return -1;
}
std::shared_ptr<int> engine_index_ptr(new int(0));
for (size_t mi = 0; mi < model_toolkit_num; ++mi) {
std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
......@@ -165,18 +176,18 @@ int Resource::initialize(const std::string& path, const std::string& file) {
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
std::string cube_config_fullpath = "./" + resource_conf.cube_config_path() +
"/" + resource_conf.cube_config_file();
this->cube_config_fullpath = cube_config_fullpath;
this->cube_quant_bits = resource_conf.has_cube_quant_bits()
? resource_conf.cube_quant_bits()
: 0;
if (this->cube_quant_bits != 0 && this->cube_quant_bits != 8) {
this->_cube_config_fullpath = cube_config_fullpath;
this->_cube_quant_bits = resource_conf.has_cube_quant_bits()
? resource_conf.cube_quant_bits()
: 0;
if (this->_cube_quant_bits != 0 && this->_cube_quant_bits != 8) {
LOG(ERROR) << "Cube quant bits illegal! should be 0 or 8.";
return -1;
}
if (this->cube_quant_bits == 0) {
if (this->_cube_quant_bits == 0) {
LOG(INFO) << "cube quant mode OFF";
} else {
LOG(INFO) << "cube quant mode ON, quant bits: " << this->cube_quant_bits;
LOG(INFO) << "cube quant mode ON, quant bits: " << this->_cube_quant_bits;
}
}
......@@ -187,10 +198,10 @@ int Resource::initialize(const std::string& path, const std::string& file) {
// model config
int Resource::general_model_initialize(const std::string& path,
const std::string& file) {
if (this->cube_config_fullpath.size() != 0) {
LOG(INFO) << "init cube by config file : " << this->cube_config_fullpath;
if (this->_cube_config_fullpath.size() != 0) {
LOG(INFO) << "init cube by config file : " << this->_cube_config_fullpath;
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
int ret = cube->init(this->cube_config_fullpath.c_str());
int ret = cube->init(this->_cube_config_fullpath.c_str());
if (ret != 0) {
LOG(ERROR) << "cube init error";
return -1;
......@@ -315,7 +326,7 @@ int Resource::thread_clear() {
}
return 0;
}
size_t Resource::get_cube_quant_bits() { return this->cube_quant_bits; }
size_t Resource::get_cube_quant_bits() { return this->_cube_quant_bits; }
int Resource::reload() {
if (FLAGS_enable_model_toolkit && InferManager::instance().reload() != 0) {
......
......@@ -16,8 +16,10 @@
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/infer.h"
......@@ -27,6 +29,8 @@ namespace baidu {
namespace paddle_serving {
namespace predictor {
// Paddle general model configuration, read the model configuration information
// from the general_model_config.proto file
class PaddleGeneralModelConfig {
public:
PaddleGeneralModelConfig() {}
......@@ -34,23 +38,47 @@ class PaddleGeneralModelConfig {
~PaddleGeneralModelConfig() {}
public:
// feed/fetch name and alias_name
std::vector<std::string> _feed_name;
std::vector<std::string> _feed_alias_name;
std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<bool> _is_lod_feed; // true lod tensor
std::vector<bool> _is_lod_fetch; // whether a fetch var is lod_tensor
std::vector<int> _capacity; // capacity for each tensor
/*
feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/
std::vector<std::vector<int>> _feed_shape;
std::vector<std::string> _fetch_name;
std::vector<std::string> _fetch_alias_name;
// Be consistent with model saving interface var type conversion
// (python/paddle serving client/io/__init__)
// int64 => 0;
// float32 => 1;
// int32 => 2;
// float64 => 3;
// int16 => 4;
// float16 => 5;
// bfloat16 => 6;
// uint8 => 7;
// int8 => 8;
// bool => 9;
// complex64 => 10,
// complex128 => 11;
std::vector<int> _feed_type;
// whether a feed or fetch var is lod_tensor.
std::vector<bool> _is_lod_feed;
std::vector<bool> _is_lod_fetch;
// capacity for each tensor
std::vector<int> _capacity;
// _feed_shape and _fetch_shape are used to represent the dimensional
// information of tensor.
// for examples, feed_shape_[i][j] represents the j(th) dim for i(th) input
// tensor.
// if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
std::vector<std::vector<int>> _feed_shape;
std::vector<std::vector<int>> _fetch_shape;
// fetch name -> index of fetch_name vector.
std::map<std::string, int> _fetch_name_to_index;
// fetch alias name -> index of fetch_alias_name vector.
std::map<std::string, int> _fetch_alias_name_to_index;
};
......@@ -73,33 +101,50 @@ class Resource {
return ins;
}
// initialize resource
int initialize(const std::string& path, const std::string& file);
// loading all models configurations from prototxt
int general_model_initialize(const std::string& path,
const std::string& file);
// initialize thread local data
int thread_initialize();
// clear thread local data
int thread_clear();
// reload resources
int reload();
// finalize
int finalize();
// get all model configs
std::vector<std::shared_ptr<PaddleGeneralModelConfig>>
get_general_model_config();
// print all configurations of all models
void print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig>& config);
// get cube quantity bit size
size_t get_cube_quant_bits();
private:
int thread_finalize() { return 0; }
private:
// configuration infermation of all models, loading from prototxt files
std::vector<std::shared_ptr<PaddleGeneralModelConfig>> _configs;
std::string cube_config_fullpath;
int cube_quant_bits; // 0 if no empty
// full path of cube configuration file.
std::string _cube_config_fullpath;
// cube quantify bit size, support 0/8. set 0 if no quant.
size_t _cube_quant_bits;
// bthread local key
THREAD_KEY_T _tls_bspec_key;
};
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
tar xf cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
--port=8027
--dict_split=1
--in_mem=true
--log_dir=./log/
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册