提交 770b6c26 编写于 作者: B bjjwwang

Merge branch 'develop' of https://github.com/paddlepaddle/serving into develop

...@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p ...@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference | | `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU | | `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT | | `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### Description of asynchronous model #### Description of asynchronous model
......
...@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p ...@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
| `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference | | `use_lite` (Only for Intel x86 CPU or ARM CPU) | - | - | Run PaddleLite inference |
| `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU | | `use_xpu` | - | - | Run PaddleLite inference with Baidu Kunlun XPU |
| `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 | | `precision` | str | FP32 | Precision Mode, support FP32, FP16, INT8 |
| `use_calib` | bool | False | Only for deployment with TensorRT | | `use_calib` | bool | False | Use TRT int8 calibration |
| `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS | | `gpu_multi_stream` | bool | False | EnableGpuMultiStream to get larger QPS |
#### 异步模型的说明 #### 异步模型的说明
......
...@@ -61,8 +61,11 @@ else() ...@@ -61,8 +61,11 @@ else()
endif() endif()
if(CUDNN_FOUND) if(CUDNN_FOUND)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h")
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
endif()
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
......
...@@ -27,52 +27,54 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/ ...@@ -27,52 +27,54 @@ set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/
message( "WITH_GPU = ${WITH_GPU}") message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "2.1.0") SET(PADDLE_VERSION "2.2.0-rc0")
if (WITH_GPU) if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0) message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}")
set(CUDA_SUFFIX "cuda11.0-cudnn8-mkl-gcc8.2") # cuda 11.0 is not supported, 11.2 would be added.
if(CUDA_VERSION EQUAL 10.1)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5")
set(WITH_TRT ON) set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.2) elseif(CUDA_VERSION EQUAL 10.2)
set(CUDA_SUFFIX "cuda10.2-cudnn8-mkl-gcc8.2") if(CUDNN_MAJOR_VERSION EQUAL 7)
set(WITH_TRT ON) set(CUDA_SUFFIX "x86-64_gcc5.4_avx_mkl_cuda10.2_cudnn7.6.5_trt6.0.1.5")
elseif(CUDA_VERSION EQUAL 10.1) set(WITH_TRT ON)
set(CUDA_SUFFIX "cuda10.1-cudnn7-mkl-gcc8.2") elseif(CUDNN_MAJOR_VERSION EQUAL 8)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4")
set(WITH_TRT ON)
endif()
elseif(CUDA_VERSION EQUAL 11.2)
set(CUDA_SUFFIX "x86-64_gcc8.2_avx_mkl_cuda11.2_cudnn8.2.1_trt8.0.3.4")
set(WITH_TRT ON) set(WITH_TRT ON)
elseif(CUDA_VERSION EQUAL 10.0)
set(CUDA_SUFFIX "cuda10-cudnn7-avx-mkl")
elseif(CUDA_VERSION EQUAL 9.0)
set(CUDA_SUFFIX "cuda9-cudnn7-avx-mkl")
endif() endif()
else() else()
set(WITH_TRT OFF) set(WITH_TRT OFF)
endif() endif()
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}")
elseif (WITH_LITE) elseif (WITH_LITE)
if (WITH_XPU) if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu") SET(PADDLE_LIB_VERSION "arm64_gcc7.3_openblas")
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif() endif()
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-mkl") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_mkl")
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-openblas") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_openblas")
endif() endif()
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-noavx-openblas") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_openblas")
endif() endif()
endif() endif()
if(WITH_LITE) if(WITH_LITE)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz") SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/2.2.0-rc0/cxx_c/Linux/XPU/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ")
else() else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif() endif()
......
...@@ -12,41 +12,97 @@ ...@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true; option java_multiple_files = true;
option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response) {} rpc debug(Request) returns (Response);
}; };
...@@ -22,11 +22,8 @@ message EngineDesc { ...@@ -22,11 +22,8 @@ message EngineDesc {
required string reloadable_type = 4; required string reloadable_type = 4;
required string model_dir = 5; required string model_dir = 5;
repeated int32 gpu_ids = 6; repeated int32 gpu_ids = 6;
required int32 runtime_thread_num = 7; optional string version_file = 7;
required int32 batch_infer_size = 8; optional string version_type = 8;
required int32 enable_batch_align = 9;
optional string version_file = 10;
optional string version_type = 11;
/* /*
* Sparse Parameter Service type. Valid types are: * Sparse Parameter Service type. Valid types are:
...@@ -39,17 +36,34 @@ message EngineDesc { ...@@ -39,17 +36,34 @@ message EngineDesc {
LOCAL = 1; LOCAL = 1;
REMOTE = 2; REMOTE = 2;
} }
optional SparseParamServiceType sparse_param_service_type = 12; optional SparseParamServiceType sparse_param_service_type = 10;
optional string sparse_param_service_table_name = 13; optional string sparse_param_service_table_name = 11;
optional bool enable_memory_optimization = 14; optional bool enable_memory_optimization = 12;
optional bool enable_ir_optimization = 15; optional bool enable_ir_optimization = 13;
optional bool use_trt = 16; optional bool use_trt = 14;
optional bool use_lite = 17; optional bool use_lite = 15;
optional bool use_xpu = 18; optional bool use_xpu = 16;
optional bool use_gpu = 19; optional bool use_gpu = 17;
optional bool combined_model = 20; optional bool combined_model = 18;
optional bool encrypted_model = 21; optional bool encrypted_model = 19;
optional bool gpu_multi_stream = 22; optional bool gpu_multi_stream = 20;
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
* n > 0 means how many Predictor for this engine in Asynchronous task
* scheduling mode.
* "batch_infer_size": the max batch for this engine in Asynchronous task
* scheduling mode.
* "enable_overrun": always put a whole task into the TaskQueue even if the
* total batch is bigger than "batch_infer_size".
* "allow_split_request": allow to split task(which is corresponding to
* request).
*/
optional int32 runtime_thread_num = 30 [ default = 0 ];
optional int32 batch_infer_size = 31 [ default = 32 ];
optional bool enable_overrun = 32 [ default = false ];
optional bool allow_split_request = 33 [ default = true ];
}; };
// model_toolkit conf // model_toolkit conf
...@@ -61,11 +75,14 @@ message ResourceConf { ...@@ -61,11 +75,14 @@ message ResourceConf {
repeated string model_toolkit_file = 2; repeated string model_toolkit_file = 2;
repeated string general_model_path = 3; repeated string general_model_path = 3;
repeated string general_model_file = 4; repeated string general_model_file = 4;
optional string cube_config_path = 5;
optional string cube_config_file = 6; optional string cube_config_path = 10;
optional int32 cube_quant_bits = 7; // set 0 if no quant. optional string cube_config_file = 11;
optional string auth_product_name = 8; optional int32 cube_quant_bits = 12;
optional string auth_container_id = 9; optional string cube_cache_path = 13;
optional string auth_product_name = 20;
optional string auth_container_id = 21;
}; };
// DAG node depency info // DAG node depency info
......
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
)
func main() {
dict_name := flag.String("n", "test", "cube name")
conf_path := flag.String("c", "./conf/cube.conf", "cube conf path")
input_path := flag.String("i", "./input.json", "keys to seek")
output_path := flag.String("o", "./output.json", "result to save")
flag.Parse()
bytes, err := ioutil.ReadFile(*conf_path)
if err != nil {
fmt.Println("读取配置文件失败", err)
return
}
var meta Meta
err = json.Unmarshal(bytes, &meta.Servers)
if err != nil {
fmt.Println("解析数据失败", err)
return
}
err = meta.Seek(*dict_name, *input_path, *output_path)
if err != nil {
fmt.Println(err)
}
return
}
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
package main
import "fmt"
type Meta struct {
Servers []CubeServer `json:"servers,omitempty"`
}
func (meta *Meta) Seek(dict_name string, input string, output string) (err error) {
var server CubeServer
for _, s := range meta.Servers {
if s.Name == dict_name {
server = s
break
}
}
if server.Name != dict_name {
err = fmt.Errorf("%s server not exist", dict_name)
return err
}
err = server.Seek(input, output)
return err
}
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
)
type Input struct {
Keys []uint64 `json:"keys"`
}
type SingleValue struct {
Status uint32 `json:"status"`
Value string `json:"value"`
}
type Output struct {
Values []SingleValue `json:"values"`
}
type ServerNode struct {
Ip string `json:"ip"`
Port uint64 `json:"port"`
}
type CubeServer struct {
Name string `json:"dict_name"`
Shard uint64 `json:"shard"`
Nodes []ServerNode `json:"nodes"`
}
func (server *CubeServer) SplitKeys(keys []uint64) (splited_keys map[uint64]Input, offset map[uint64][]uint64) {
splited_keys = make(map[uint64]Input)
offset = make(map[uint64][]uint64)
for i, key := range keys {
shard_id := key % server.Shard
temp_split, _ := splited_keys[shard_id]
temp_split.Keys = append(temp_split.Keys, key)
splited_keys[shard_id] = temp_split
temp_offset, _ := offset[shard_id]
temp_offset = append(temp_offset, uint64(i))
offset[shard_id] = temp_offset
}
return splited_keys, offset
}
func (server *CubeServer) Seek(input string, output_path string) (err error) {
file, err := os.Open(input)
if err != nil {
return err
}
defer file.Close()
buf := bufio.NewReader(file)
for {
line, err := buf.ReadBytes('\n')
//line = strings.TrimSpace(line)
if err != nil || io.EOF == err {
break
}
var temp_input Input
json.Unmarshal(line, &temp_input)
key_nums := len(temp_input.Keys)
var output Output
output.Values = make([]SingleValue, key_nums+1)
splited_keys, offset := server.SplitKeys(temp_input.Keys)
for shard_id, keys := range splited_keys {
cur_output, _ := server.Post(shard_id, keys)
for index, single_value := range cur_output.Values {
output.Values[offset[shard_id][index]] = single_value
}
}
json_str, _ := json.Marshal(output)
fp, err := os.OpenFile(output_path, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0755)
if err != nil {
log.Fatal(err)
}
defer fp.Close()
_, err = fp.Write(json_str)
}
return err
}
func (server *CubeServer) Post(shard_id uint64, input Input) (output Output, err error) {
if shard_id >= uint64(len(server.Nodes)) {
err = fmt.Errorf("have no shard:%v", shard_id)
return output, err
}
json_str, _ := json.Marshal(input)
URL := fmt.Sprintf("http://%s:%v/DictService/seek", server.Nodes[shard_id].Ip, server.Nodes[shard_id].Port)
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(json_str))
if err != nil {
return output, err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return output, err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return output, err
}
err = json.Unmarshal(body, &output)
return output, err
}
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
#coding=utf-8
import requests
import sys
import json
class Meta(object):
"""记录cube分片server路由"""
def __init__(self, conf_path):
"""根据配置文件初始化路由"""
self.server_api = "/DictService/seek"
self.server_meta = {}
with open(conf_path, "r", encoding="utf8") as fp:
cube_servcers = json.load(fp)
for server in cube_servcers:
self.server_meta[server["dict_name"]] = server
fp.close()
def seek(self, dict_name, keys_path, save_path):
"""查询"""
save_file = open(save_path, 'w')
with open(keys_path, "r", encoding="utf8") as fp:
lines = fp.readlines()
for line in lines:
json_line = json.loads(line)
values = [{} for i in range(len(json_line["keys"]))]
splited_keys, offset = self.split_keys(json_line)
for shard_id, keys in splited_keys.items():
results = self.post(dict_name, shard_id, keys)
for i, result in enumerate(results["values"]):
values[offset[shard_id][i]] = result
cur_line_results = {}
cur_line_results["values"] = values
json.dump(cur_line_results, save_file)
save_file.write("\n")
fp.close()
save_file.close()
def split_keys(self, json_line):
"""根据key值及分片数判断去哪一个分片上查询"""
keys_split = {}
offset = {}
i = 0
for key in json_line["keys"]:
shard_id = key % self.server_meta[dict_name]["shard"]
if shard_id not in keys_split:
keys_split[shard_id] = []
keys_split[shard_id].append(key)
if shard_id not in offset:
offset[shard_id] = []
offset[shard_id].append(i)
i += 1
return keys_split, offset
def post(self, dict_name, shard_id, keys):
"""向分片server发送post请求"""
api = "http://%s:%s%s" % (self.server_meta[dict_name]["nodes"][shard_id]["ip"],
self.server_meta[dict_name]["nodes"][shard_id]["port"],
self.server_api)
data = {"keys": keys}
response = requests.post(api, json.dumps(data))
return response.json()
if __name__ == '__main__':
if len(sys.argv) != 5:
print('please usage: python demo.py conf_path dict_name keys_path save_path')
exit(0)
conf_path = sys.argv[1]
dict_name = sys.argv[2]
keys_path = sys.argv[3]
save_path = sys.argv[4]
meta = Meta(conf_path)
meta.seek(dict_name, keys_path, save_path)
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
\ No newline at end of file
# cube python api说明文档
参考[大规模稀疏参数服务Cube的部署和使用](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md#2-大规模稀疏参数服务cube的部署和使用)文档进行cube的部署。
使用python api,可替代上述文档中第3节预测服务的部署、使用
## 配置说明
conf/cube.conf 以json格式,设置各个分片cube server的ip以及port,shard与分片数一致,示例:
```bash
[{
"dict_name": "test",
"shard": 2,
"nodes": [{
"ip": "127.0.0.1",
"port": 8731
},{
"ip": "127.0.0.1",
"port": 8730
}]
}]
```
## 数据格式
```bash
{"keys": [0,1,2,3,4,5,6,7]}
{"keys": [1]}
```
支持批量查询,每次查询一行
## 使用
```bash
cd ./python-api
python3 demo.py conf/cube.conf test input.json result.json
```
\ No newline at end of file
{"values": [{"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}]}
{"values": [{"status": 4294967295, "value": ""}]}
...@@ -3,3 +3,24 @@ add_subdirectory(pybind11) ...@@ -3,3 +3,24 @@ add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib) target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif() endif()
if(CLIENT)
FILE(GLOB client_srcs include/*.h src/client.cpp src/brpc_client.cpp)
add_library(client ${client_srcs})
add_dependencies(client utils sdk-cpp)
target_link_libraries(client utils sdk-cpp)
endif()
if(CLIENT)
include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
add_executable(simple_client example/simple_client.cpp)
add_dependencies(simple_client utils sdk-cpp client)
target_link_libraries(simple_client -Wl,--whole-archive
-Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
target_link_libraries(simple_client utils)
target_link_libraries(simple_client sdk-cpp)
target_link_libraries(simple_client client)
endif()
\ No newline at end of file
# 用于Paddle Serving的C++客户端
(简体中文|[English](./README.md))
## 请求BRPC-Server
### 服务端启动
以fit_a_line模型为例,服务端启动与常规BRPC-Server端启动命令一样。
```
cd ../../python/examples/fit_a_line
sh get_data.sh
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
```
### 客户端预测
客户端目前支持BRPC
目前已经实现了BRPC的封装函数,详见[brpc_client.cpp](./src/brpc_client.cpp)
```
./simple_client --client_conf="uci_housing_client/serving_client_conf.prototxt" --server_port="127.0.0.1:9393" --test_type="brpc" --sample_type="fit_a_line"
```
更多示例详见[simple_client.cpp](./example/simple_client.cpp)
| Argument | Type | Default | Description |
| ---------------------------------------------- | ---- | ------------------------------------ | ----------------------------------------------------- |
| `client_conf` | str | `"serving_client_conf.prototxt"` | Path of client conf |
| `server_port` | str | `"127.0.0.1:9393"` | Exposed ip:port of server |
| `test_type` | str | `"brpc"` | Mode of request "brpc" |
| `sample_type` | str | `"fit_a_line"` | Type of sample include "fit_a_line,bert" |
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <vector>
#include "core/general-client/include/brpc_client.h"
using baidu::paddle_serving::client::ServingClient;
using baidu::paddle_serving::client::ServingBrpcClient;
using baidu::paddle_serving::client::PredictorInputs;
using baidu::paddle_serving::client::PredictorOutputs;
DEFINE_string(server_port, "127.0.0.1:9292", "ip:port");
DEFINE_string(client_conf, "serving_client_conf.prototxt", "Path of client conf");
DEFINE_string(test_type, "brpc", "brpc");
// fit_a_line, bert
DEFINE_string(sample_type, "fit_a_line", "List: fit_a_line, bert");
namespace {
int prepare_fit_a_line(PredictorInputs& input, std::vector<std::string>& fetch_name) {
std::vector<float> float_feed = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
0.0582f, -0.0727f, -0.1583f, -0.0584f,
0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
std::vector<int> float_shape = {1, 13};
std::string feed_name = "x";
fetch_name = {"price"};
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
return 0;
}
int prepare_bert(PredictorInputs& input, std::vector<std::string>& fetch_name) {
{
std::vector<float> float_feed(128, 0.0f);
float_feed[0] = 1.0f;
std::vector<int> float_shape = {1, 128, 1};
std::string feed_name = "input_mask";
std::vector<int> lod;
input.add_float_data(float_feed, feed_name, float_shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "position_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
feed[0] = 101;
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "input_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
{
std::vector<int64_t> feed(128, 0);
std::vector<int> shape = {1, 128, 1};
std::string feed_name = "segment_ids";
std::vector<int> lod;
input.add_int64_data(feed, feed_name, shape, lod);
}
fetch_name = {"pooled_output"};
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::string url = FLAGS_server_port;
std::string conf = FLAGS_client_conf;
std::string test_type = FLAGS_test_type;
std::string sample_type = FLAGS_sample_type;
LOG(INFO) << "url = " << url << ";"
<< "client_conf = " << conf << ";"
<< "test_type = " << test_type
<< "sample_type = " << sample_type;
std::unique_ptr<ServingClient> client;
// default type is brpc
// will add grpc&http in the future
if (test_type == "brpc") {
client.reset(new ServingBrpcClient());
} else {
client.reset(new ServingBrpcClient());
}
std::vector<std::string> confs;
confs.push_back(conf);
if (client->init(confs, url) != 0) {
LOG(ERROR) << "Failed to init client!";
return 0;
}
PredictorInputs input;
PredictorOutputs output;
std::vector<std::string> fetch_name;
if (sample_type == "fit_a_line") {
prepare_fit_a_line(input, fetch_name);
}
else if (sample_type == "bert") {
prepare_bert(input, fetch_name);
}
else {
prepare_fit_a_line(input, fetch_name);
}
if (client->predict(input, output, fetch_name, 0) != 0) {
LOG(ERROR) << "Failed to predict!";
}
else {
LOG(INFO) << output.print();
}
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi;
namespace baidu {
namespace paddle_serving {
namespace client {
class ServingBrpcClient : public ServingClient {
public:
ServingBrpcClient() {};
~ServingBrpcClient() {};
virtual int connect(const std::string server_port);
int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id);
private:
// generate default SDKConf
std::string gen_desc(const std::string server_port);
private:
PredictorApi _api;
Predictor* _predictor;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <memory>
namespace baidu {
namespace paddle_serving {
namespace predictor {
namespace general_model {
class Request;
class Response;
}
}
namespace client {
class PredictorInputs;
class PredictorOutputs;
class ServingClient {
public:
ServingClient() {};
virtual ~ServingClient() = default;
int init(const std::vector<std::string>& client_conf,
const std::string server_port);
int load_client_config(const std::vector<std::string>& client_conf);
virtual int connect(const std::string server_port) = 0;
virtual int predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) = 0;
protected:
std::map<std::string, int> _feed_name_to_idx;
std::vector<std::string> _feed_name;
std::map<std::string, int> _fetch_name_to_idx;
std::map<std::string, std::string> _fetch_name_to_var_name;
std::map<std::string, int> _fetch_name_to_type;
std::vector<std::vector<int>> _shape;
std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
};
class PredictorData {
public:
PredictorData() {};
virtual ~PredictorData() {};
void add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 1);
void add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 0);
void add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 2);
void add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 20);
const std::map<std::string, std::vector<float>>& float_data_map() const {
return _float_data_map;
};
std::map<std::string, std::vector<float>>* mutable_float_data_map() {
return &_float_data_map;
};
const std::map<std::string, std::vector<int64_t>>& int64_data_map() const {
return _int64_data_map;
};
std::map<std::string, std::vector<int64_t>>* mutable_int64_data_map() {
return &_int64_data_map;
};
const std::map<std::string, std::vector<int32_t>>& int_data_map() const {
return _int32_data_map;
};
std::map<std::string, std::vector<int32_t>>* mutable_int_data_map() {
return &_int32_data_map;
};
const std::map<std::string, std::string>& string_data_map() const {
return _string_data_map;
};
std::map<std::string, std::string>* mutable_string_data_map() {
return &_string_data_map;
};
const std::map<std::string, std::vector<int>>& shape_map() const {
return _shape_map;
};
std::map<std::string, std::vector<int>>* mutable_shape_map() {
return &_shape_map;
};
const std::map<std::string, std::vector<int>>& lod_map() const {
return _lod_map;
};
std::map<std::string, std::vector<int>>* mutable_lod_map() {
return &_lod_map;
};
int get_datatype(std::string name) const;
void set_datatype(std::string name, int type);
std::string print();
private:
// used to print vector data map e.g. _float_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, std::vector<T2>>& map) {
std::ostringstream oss;
oss.str("");
oss.precision(6);
oss.setf(std::ios::fixed);
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, std::vector<T2>>::const_iterator it = map.begin();
typename std::map<T1, std::vector<T2>>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg;
const std::vector<T2>& v = it->second;
oss << v.size() << key_seg;
for (size_t i = 0; i < v.size(); ++i) {
if (i != v.size() - 1) {
oss << v[i] << val_seg;
}
else {
oss << v[i];
}
}
oss << "}";
}
return oss.str();
};
// used to print data map without vector e.g. _string_data_map
template<typename T1, typename T2>
std::string map2string(const std::map<T1, T2>& map) {
std::ostringstream oss;
oss.str("");
std::string key_seg = ":";
std::string val_seg = ",";
std::string end_seg = "\n";
typename std::map<T1, T2>::const_iterator it = map.begin();
typename std::map<T1, T2>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg
<< "size=" << it->second.size() << key_seg
<< "type=" << this->get_datatype(it->first);
oss << "}";
}
return oss.str();
};
protected:
std::map<std::string, std::vector<float>> _float_data_map;
std::map<std::string, std::vector<int64_t>> _int64_data_map;
std::map<std::string, std::vector<int32_t>> _int32_data_map;
std::map<std::string, std::string> _string_data_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::map<std::string, int> _datatype_map;
};
class PredictorInputs : public PredictorData {
public:
PredictorInputs() {};
virtual ~PredictorInputs() {};
// generate proto from inputs
// feed_name_to_idx: mapping alias name to idx
// feed_name: mapping idx to name
static int GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
predictor::general_model::Request& req);
};
class PredictorOutputs {
public:
struct PredictorOutput {
std::string engine_name;
PredictorData data;
};
PredictorOutputs() {};
virtual ~PredictorOutputs() {};
const std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>& datas() {
return _datas;
};
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>>* mutable_datas() {
return &_datas;
};
void add_data(const std::shared_ptr<PredictorOutputs::PredictorOutput>& data) {
_datas.push_back(data);
};
std::string print();
void clear();
// Parse proto to outputs
// fetch_name: name of data to be output
// fetch_name_to_type: mapping of fetch_name to datatype
static int ParseProto(const predictor::general_model::Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs);
protected:
std::vector<std::shared_ptr<PredictorOutputs::PredictorOutput>> _datas;
};
} // namespace client
} // namespace paddle_serving
} // namespace baidu
\ No newline at end of file
...@@ -51,8 +51,13 @@ class ModelRes { ...@@ -51,8 +51,13 @@ class ModelRes {
res._float_value_map.end()); res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(), _int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end()); res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(),
res._tensor_alias_names.begin(),
res._tensor_alias_names.end());
} }
ModelRes(ModelRes&& res) { ModelRes(ModelRes&& res) {
_engine_name = std::move(res._engine_name); _engine_name = std::move(res._engine_name);
...@@ -65,10 +70,17 @@ class ModelRes { ...@@ -65,10 +70,17 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map))); std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
} }
~ModelRes() {} ~ModelRes() {}
const std::vector<int64_t>& get_int64_by_name(const std::string& name) { const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
...@@ -89,6 +101,12 @@ class ModelRes { ...@@ -89,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) { std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]); return std::move(_int32_value_map[name]);
} }
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) { const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name]; return _shape_map[name];
} }
...@@ -105,6 +123,10 @@ class ModelRes { ...@@ -105,6 +123,10 @@ class ModelRes {
_engine_name = engine_name; _engine_name = engine_name;
} }
const std::string& engine_name() { return _engine_name; } const std::string& engine_name() { return _engine_name; }
const std::vector<std::string>& tensor_alias_names() {
return _tensor_alias_names;
}
ModelRes& operator=(ModelRes&& res) { ModelRes& operator=(ModelRes&& res) {
if (this != &res) { if (this != &res) {
_engine_name = std::move(res._engine_name); _engine_name = std::move(res._engine_name);
...@@ -117,10 +139,17 @@ class ModelRes { ...@@ -117,10 +139,17 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
std::make_move_iterator(std::end(res._lod_map))); std::make_move_iterator(std::end(res._lod_map)));
_tensor_alias_names.insert(
_tensor_alias_names.end(),
std::make_move_iterator(std::begin(res._tensor_alias_names)),
std::make_move_iterator(std::end(res._tensor_alias_names)));
} }
return *this; return *this;
} }
...@@ -130,8 +159,10 @@ class ModelRes { ...@@ -130,8 +159,10 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map; std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map; std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map; std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map; std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map; std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names;
}; };
class PredictorRes { class PredictorRes {
...@@ -168,6 +199,14 @@ class PredictorRes { ...@@ -168,6 +199,14 @@ class PredictorRes {
const std::string& name) { const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
} }
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx, const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) { const std::string& name) {
return _models[model_idx].get_shape_by_name(name); return _models[model_idx].get_shape_by_name(name);
...@@ -193,11 +232,16 @@ class PredictorRes { ...@@ -193,11 +232,16 @@ class PredictorRes {
} }
const std::string& variant_tag() { return _variant_tag; } const std::string& variant_tag() { return _variant_tag; }
const std::vector<std::string>& get_engine_names() { return _engine_names; } const std::vector<std::string>& get_engine_names() { return _engine_names; }
const std::vector<std::string>& get_tensor_alias_names(const int model_idx) {
_tensor_alias_names = _models[model_idx].tensor_alias_names();
return _tensor_alias_names;
}
private: private:
std::vector<ModelRes> _models; std::vector<ModelRes> _models;
std::string _variant_tag; std::string _variant_tag;
std::vector<std::string> _engine_names; std::vector<std::string> _engine_names;
std::vector<std::string> _tensor_alias_names;
}; };
class PredictorClient { class PredictorClient {
...@@ -222,10 +266,14 @@ class PredictorClient { ...@@ -222,10 +266,14 @@ class PredictorClient {
const std::vector<std::string>& float_feed_name, const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape, const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch, const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<py::array_t<int64_t>>& int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string>& int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string>& string_feed, const std::vector<std::string>& string_feed,
const std::vector<std::string>& string_feed_name, const std::vector<std::string>& string_feed_name,
const std::vector<std::vector<int>>& string_shape, const std::vector<std::vector<int>>& string_shape,
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/brpc_client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/util/include/timer.h"
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
DEFINE_bool(profile_client, false, "");
DEFINE_bool(profile_server, false, "");
#define BRPC_MAX_BODY_SIZE 512 * 1024 * 1024
namespace baidu {
namespace paddle_serving {
namespace client {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
using configure::SDKConf;
using configure::VariantConf;
using configure::Predictor;
using configure::VariantConf;
int ServingBrpcClient::connect(const std::string server_port) {
brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
if (_api.create(gen_desc(server_port)) != 0) {
LOG(ERROR) << "Predictor Creation Failed";
return -1;
}
// _api.thrd_initialize();
return 0;
}
std::string ServingBrpcClient::gen_desc(const std::string server_port) {
// default config for brpc
SDKConf sdk_conf;
Predictor* predictor = sdk_conf.add_predictors();
predictor->set_name("general_model");
predictor->set_service_name("baidu.paddle_serving.predictor.general_model.GeneralModelService");
predictor->set_endpoint_router("WeightedRandomRender");
predictor->mutable_weighted_random_render_conf()->set_variant_weight_list("100");
VariantConf* predictor_var = predictor->add_variants();
predictor_var->set_tag("default_tag_1");
std::string cluster = "list://" + server_port;
predictor_var->mutable_naming_conf()->set_cluster(cluster);
VariantConf* var = sdk_conf.mutable_default_variant_conf();
var->set_tag("default");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_connection_conf()->set_rpc_timeout_ms(200000);
var->mutable_connection_conf()->set_connect_retry_count(2);
var->mutable_connection_conf()->set_max_connection_per_host(100);
var->mutable_connection_conf()->set_hedge_request_timeout_ms(-1);
var->mutable_connection_conf()->set_hedge_fetch_retry_count(2);
var->mutable_connection_conf()->set_connection_type("pooled");
var->mutable_connection_conf()->set_connect_timeout_ms(2000);
var->mutable_naming_conf()->set_cluster_filter_strategy("Default");
var->mutable_naming_conf()->set_load_balance_strategy("la");
var->mutable_rpc_parameter()->set_compress_type(0);
var->mutable_rpc_parameter()->set_package_size(20);
var->mutable_rpc_parameter()->set_protocol("baidu_std");
var->mutable_rpc_parameter()->set_max_channel_per_request(3);
return sdk_conf.SerializePartialAsString();
}
int ServingBrpcClient::predict(const PredictorInputs& inputs,
PredictorOutputs& outputs,
const std::vector<std::string>& fetch_name,
const uint64_t log_id) {
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// thread initialize for StubTLS
_api.thrd_initialize();
std::string variant_tag;
// predictor is bound to request with brpc::Controller
_predictor = _api.fetch_predictor("general_model", &variant_tag);
if (_predictor == NULL) {
LOG(ERROR) << "Failed fetch predictor so predict error!";
return -1;
}
// predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "variant_tag:" << variant_tag;
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
if (PredictorInputs::GenProto(inputs, _feed_name_to_idx, _feed_name, req) != 0) {
LOG(ERROR) << "Failed to preprocess req!";
return -1;
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
return -1;
}
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
if (PredictorOutputs::ParseProto(res, fetch_name, _fetch_name_to_type, outputs) != 0) {
LOG(ERROR) << "Failed to post_process res!";
return -1;
}
postprocess_end = timeline.TimeStampUS();
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "pid:" << getpid() << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
// release predictor
_api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0;
}
} // namespace general_model
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-client/include/client.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace client {
using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) {
if (load_client_config(client_conf) != 0) {
LOG(ERROR) << "Failed to load client config";
return -1;
}
// pure virtual func, subclass implementation
if (connect(server_port) != 0) {
LOG(ERROR) << "Failed to connect";
return -1;
}
return 0;
}
int ServingClient::load_client_config(const std::vector<std::string> &conf_file) {
try {
GeneralModelConfig model_config;
if (configure::read_proto_conf(conf_file[0].c_str(), &model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[0];
return -1;
}
_feed_name_to_idx.clear();
_fetch_name_to_idx.clear();
_shape.clear();
int feed_var_num = model_config.feed_var_size();
_feed_name.clear();
VLOG(2) << "feed var num: " << feed_var_num;
for (int i = 0; i < feed_var_num; ++i) {
_feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
VLOG(2) << "feed [" << i << "]"
<< " name: " << model_config.feed_var(i).name();
_feed_name.push_back(model_config.feed_var(i).name());
VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
<< " index: " << i;
std::vector<int> tmp_feed_shape;
VLOG(2) << "feed"
<< "[" << i << "] shape:";
for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
tmp_feed_shape.push_back(model_config.feed_var(i).shape(j));
VLOG(2) << "shape[" << j << "]: " << model_config.feed_var(i).shape(j);
}
_type.push_back(model_config.feed_var(i).feed_type());
VLOG(2) << "feed"
<< "[" << i
<< "] feed type: " << model_config.feed_var(i).feed_type();
_shape.push_back(tmp_feed_shape);
}
if (conf_file.size() > 1) {
model_config.Clear();
if (configure::read_proto_conf(conf_file[conf_file.size() - 1].c_str(),
&model_config) != 0) {
LOG(ERROR) << "Failed to load general model config"
<< ", file path: " << conf_file[conf_file.size() - 1];
return -1;
}
}
int fetch_var_num = model_config.fetch_var_size();
VLOG(2) << "fetch_var_num: " << fetch_var_num;
for (int i = 0; i < fetch_var_num; ++i) {
_fetch_name_to_idx[model_config.fetch_var(i).alias_name()] = i;
VLOG(2) << "fetch [" << i << "]"
<< " alias name: " << model_config.fetch_var(i).alias_name();
_fetch_name_to_var_name[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).name();
_fetch_name_to_type[model_config.fetch_var(i).alias_name()] =
model_config.fetch_var(i).fetch_type();
}
} catch (std::exception &e) {
LOG(ERROR) << "Failed load general model config" << e.what();
return -1;
}
return 0;
}
void PredictorData::add_float_data(const std::vector<float>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_float_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int64_data(const std::vector<int64_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int64_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_int32_data(const std::vector<int32_t>& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_int32_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
void PredictorData::add_string_data(const std::string& data,
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype) {
_string_data_map[name] = data;
_shape_map[name] = shape;
_lod_map[name] = lod;
_datatype_map[name] = datatype;
}
int PredictorData::get_datatype(std::string name) const {
std::map<std::string, int>::const_iterator it = _datatype_map.find(name);
if (it != _datatype_map.end()) {
return it->second;
}
return 0;
}
void PredictorData::set_datatype(std::string name, int type) {
_datatype_map[name] = type;
}
std::string PredictorData::print() {
std::string res;
res.append(map2string<std::string, float>(_float_data_map));
res.append(map2string<std::string, int64_t>(_int64_data_map));
res.append(map2string<std::string, int32_t>(_int32_data_map));
res.append(map2string<std::string, std::string>(_string_data_map));
return res;
}
int PredictorInputs::GenProto(const PredictorInputs& inputs,
const std::map<std::string, int>& feed_name_to_idx,
const std::vector<std::string>& feed_name,
Request& req) {
const std::map<std::string, std::vector<float>>& float_feed_map = inputs.float_data_map();
const std::map<std::string, std::vector<int64_t>>& int64_feed_map = inputs.int64_data_map();
const std::map<std::string, std::vector<int32_t>>& int32_feed_map = inputs.int_data_map();
const std::map<std::string, std::string>& string_feed_map = inputs.string_data_map();
const std::map<std::string, std::vector<int>>& shape_map = inputs.shape_map();
const std::map<std::string, std::vector<int>>& lod_map = inputs.lod_map();
VLOG(2) << "float feed name size: " << float_feed_map.size();
VLOG(2) << "int feed name size: " << int64_feed_map.size();
VLOG(2) << "string feed name size: " << string_feed_map.size();
// batch is already in Tensor.
for (std::map<std::string, std::vector<float>>::const_iterator iter = float_feed_map.begin();
iter != float_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<float>& float_data = iter->second;
const std::vector<int>& float_shape = shape_map.at(name);
const std::vector<int>& float_lod = lod_map.at(name);
// default datatype = P_FLOAT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
VLOG(2) << "prepare float feed " << name << " idx " << idx;
int total_number = float_data.size();
Tensor *tensor = req.add_tensor();
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape.size();
for (uint32_t j = 0; j < float_shape.size(); ++j) {
tensor->add_shape(float_shape[j]);
}
for (uint32_t j = 0; j < float_lod.size(); ++j) {
tensor->add_lod(float_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_float_data()->Resize(total_number, 0);
memcpy(tensor->mutable_float_data()->mutable_data(), float_data.data(), total_number * sizeof(float));
}
for (std::map<std::string, std::vector<int64_t>>::const_iterator iter = int64_feed_map.begin();
iter != int64_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int64_t>& int64_data = iter->second;
const std::vector<int>& int64_shape = shape_map.at(name);
const std::vector<int>& int64_lod = lod_map.at(name);
// default datatype = P_INT64
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int64_data.size();
for (uint32_t j = 0; j < int64_shape.size(); ++j) {
tensor->add_shape(int64_shape[j]);
}
for (uint32_t j = 0; j < int64_lod.size(); ++j) {
tensor->add_lod(int64_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), int64_data.data(), total_number * sizeof(int64_t));
}
for (std::map<std::string, std::vector<int32_t>>::const_iterator iter = int32_feed_map.begin();
iter != int32_feed_map.end();
++iter) {
std::string name = iter->first;
const std::vector<int32_t>& int32_data = iter->second;
const std::vector<int>& int32_shape = shape_map.at(name);
const std::vector<int>& int32_lod = lod_map.at(name);
// default datatype = P_INT32
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
int total_number = int32_data.size();
for (uint32_t j = 0; j < int32_shape.size(); ++j) {
tensor->add_shape(int32_shape[j]);
}
for (uint32_t j = 0; j < int32_lod.size(); ++j) {
tensor->add_lod(int32_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), int32_data.data(), total_number * sizeof(int32_t));
}
for (std::map<std::string, std::string>::const_iterator iter = string_feed_map.begin();
iter != string_feed_map.end();
++iter) {
std::string name = iter->first;
const std::string& string_data = iter->second;
const std::vector<int>& string_shape = shape_map.at(name);
const std::vector<int>& string_lod = lod_map.at(name);
// default datatype = P_STRING
int datatype = inputs.get_datatype(name);
std::map<std::string, int>::const_iterator feed_name_it = feed_name_to_idx.find(name);
if (feed_name_it == feed_name_to_idx.end()) {
LOG(ERROR) << "Do not find [" << name << "] in feed_map!";
return -1;
}
int idx = feed_name_to_idx.at(name);
Tensor *tensor = req.add_tensor();
for (uint32_t j = 0; j < string_shape.size(); ++j) {
tensor->add_shape(string_shape[j]);
}
for (uint32_t j = 0; j < string_lod.size(); ++j) {
tensor->add_lod(string_lod[j]);
}
tensor->set_elem_type(datatype);
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
if (datatype == P_STRING) {
const int string_shape_size = string_shape.size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_data);
break;
}
}
} else {
tensor->set_tensor_content(string_data);
}
}
return 0;
}
std::string PredictorOutputs::print() {
std::string res = "";
for (size_t i = 0; i < _datas.size(); ++i) {
res.append(_datas[i]->engine_name);
res.append(":");
res.append(_datas[i]->data.print());
res.append("\n");
}
return res;
}
void PredictorOutputs::clear() {
_datas.clear();
}
int PredictorOutputs::ParseProto(const Response& res,
const std::vector<std::string>& fetch_name,
std::map<std::string, int>& fetch_name_to_type,
PredictorOutputs& outputs) {
VLOG(2) << "get model output num";
uint32_t model_num = res.outputs_size();
VLOG(2) << "model num: " << model_num;
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
VLOG(2) << "process model output index: " << m_idx;
auto& output = res.outputs(m_idx);
std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
std::make_shared<PredictorOutputs::PredictorOutput>();
predictor_output->engine_name = output.engine_name();
PredictorData& predictor_data = predictor_output->data;
std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
std::map<std::string, std::string>& string_data_map = *predictor_output->data.mutable_string_data_map();
std::map<std::string, std::vector<int>>& shape_map = *predictor_output->data.mutable_shape_map();
std::map<std::string, std::vector<int>>& lod_map = *predictor_output->data.mutable_lod_map();
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
shape_map[name][i] = output.tensor(idx).shape(i);
}
int lod_size = output.tensor(idx).lod_size();
if (lod_size > 0) {
lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
lod_map[name][i] = output.tensor(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "fetch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size();
int64_data_map[name] = std::vector<int64_t>(
output.tensor(idx).int64_data().begin(),
output.tensor(idx).int64_data().begin() + size);
} else if (fetch_name_to_type[name] == P_FLOAT32) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.tensor(idx).float_data_size();
float_data_map[name] = std::vector<float>(
output.tensor(idx).float_data().begin(),
output.tensor(idx).float_data().begin() + size);
} else if (fetch_name_to_type[name] == P_INT32) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.tensor(idx).int_data_size();
int32_data_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8
|| fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
}
predictor_data.set_datatype(name, output.tensor(idx).elem_type());
idx += 1;
}
outputs.add_data(predictor_output);
}
return 0;
}
} // namespace client
} // namespace paddle_serving
} // namespace baidu
...@@ -25,7 +25,22 @@ using baidu::paddle_serving::Timer; ...@@ -25,7 +25,22 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
std::once_flag gflags_init_flag; std::once_flag gflags_init_flag;
namespace py = pybind11; namespace py = pybind11;
...@@ -152,10 +167,14 @@ int PredictorClient::numpy_predict( ...@@ -152,10 +167,14 @@ int PredictorClient::numpy_predict(
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed, const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape, const std::vector<std::vector<int>> &string_shape,
...@@ -168,15 +187,14 @@ int PredictorClient::numpy_predict( ...@@ -168,15 +187,14 @@ int PredictorClient::numpy_predict(
Timer timeline; Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS(); int64_t preprocess_start = timeline.TimeStampUS();
int fetch_name_num = fetch_name.size();
_api.thrd_initialize(); _api.thrd_initialize();
std::string variant_tag; std::string variant_tag;
_predictor = _api.fetch_predictor("general_model", &variant_tag); _predictor = _api.fetch_predictor("general_model", &variant_tag);
predict_res_batch.set_variant_tag(variant_tag); predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done."; VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int32_feed_name.size();
VLOG(2) << "int feed name size: " << int64_feed_name.size();
VLOG(2) << "string feed name size: " << string_feed_name.size(); VLOG(2) << "string feed name size: " << string_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
...@@ -193,7 +211,11 @@ int PredictorClient::numpy_predict( ...@@ -193,7 +211,11 @@ int PredictorClient::numpy_predict(
tensor_vec.push_back(req.add_tensor()); tensor_vec.push_back(req.add_tensor());
} }
for (auto &name : int_feed_name) { for (auto &name : int32_feed_name) {
tensor_vec.push_back(req.add_tensor());
}
for (auto &name : int64_feed_name) {
tensor_vec.push_back(req.add_tensor()); tensor_vec.push_back(req.add_tensor());
} }
...@@ -233,37 +255,63 @@ int PredictorClient::numpy_predict( ...@@ -233,37 +255,63 @@ int PredictorClient::numpy_predict(
} }
vec_idx = 0; vec_idx = 0;
for (auto &name : int_feed_name) { for (auto &name : int32_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) { if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()"; LOG(ERROR) << "idx > tensor_vec.size()";
return -1; return -1;
} }
Tensor *tensor = tensor_vec[idx]; Tensor *tensor = tensor_vec[idx];
int nbytes = int_feed[vec_idx].nbytes(); int nbytes = int32_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0)); void *rawdata_ptr = (void *)(int32_feed[vec_idx].data(0));
int total_number = int_feed[vec_idx].size(); int total_number = int32_feed[vec_idx].size();
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { for (uint32_t j = 0; j < int32_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]); tensor->add_shape(int32_shape[vec_idx][j]);
} }
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < int32_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]); tensor->add_lod(int32_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(_type[idx]); tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]); tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
if (_type[idx] == P_INT64) { tensor->mutable_int_data()->Resize(total_number, 0);
tensor->mutable_int64_data()->Resize(total_number, 0); memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes); vec_idx++;
} else { }
tensor->mutable_int_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
// Individual INT_64 feed data of int_input to tensor_content
vec_idx = 0;
for (auto &name : int64_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
Tensor *tensor = tensor_vec[idx];
int nbytes = int64_feed[vec_idx].nbytes();
void *rawdata_ptr = (void *)(int64_feed[vec_idx].data(0));
int total_number = int64_feed[vec_idx].size();
for (uint32_t j = 0; j < int64_shape[vec_idx].size(); ++j) {
tensor->add_shape(int64_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int64_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int64_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
tensor->mutable_int64_data()->Resize(total_number, 0);
memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
vec_idx++; vec_idx++;
} }
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0; vec_idx = 0;
for (auto &name : string_feed_name) { for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
...@@ -279,22 +327,27 @@ int PredictorClient::numpy_predict( ...@@ -279,22 +327,27 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]); tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]); tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
const int string_shape_size = string_shape[vec_idx].size(); if (_type[idx] != P_STRING) {
// string_shape[vec_idx] = [1];cause numpy has no datatype of string. tensor->set_elem_type(_type[idx]);
// we pass string via vector<vector<string> >. tensor->set_tensor_content(string_feed[vec_idx]);
if (string_shape_size != 1) { } else {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : " tensor->set_elem_type(P_STRING);
<< string_shape_size; const int string_shape_size = string_shape[vec_idx].size();
return -1; // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
} // we pass string via vector<vector<string> >.
switch (string_shape_size) { if (string_shape_size != 1) {
case 1: { LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
tensor->add_data(string_feed[vec_idx]); << string_shape_size;
break; return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
} }
} }
vec_idx++; vec_idx++;
...@@ -308,10 +361,8 @@ int PredictorClient::numpy_predict( ...@@ -308,10 +361,8 @@ int PredictorClient::numpy_predict(
int64_t postprocess_start = 0; int64_t postprocess_start = 0;
int64_t postprocess_end = 0; int64_t postprocess_end = 0;
if (FLAGS_profile_client) { if (FLAGS_profile_server) {
if (FLAGS_profile_server) { req.set_profile_server(true);
req.set_profile_server(true);
}
} }
res.Clear(); res.Clear();
...@@ -329,10 +380,12 @@ int PredictorClient::numpy_predict( ...@@ -329,10 +380,12 @@ int PredictorClient::numpy_predict(
auto output = res.outputs(m_idx); auto output = res.outputs(m_idx);
ModelRes model; ModelRes model;
model.set_engine_name(output.engine_name()); model.set_engine_name(output.engine_name());
// 在ResponseOp处,已经按照fetch_name对输出数据进行了处理
int idx = 0; // 所以,输出的数据与fetch_name是严格对应的,按顺序处理即可。
for (auto &name : fetch_name) { for (int idx = 0; idx < output.tensor_size(); ++idx) {
// int idx = _fetch_name_to_idx[name]; // int idx = _fetch_name_to_idx[name];
const std::string name = output.tensor(idx).alias_name();
model._tensor_alias_names.push_back(name);
int shape_size = output.tensor(idx).shape_size(); int shape_size = output.tensor(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size " VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size; << shape_size;
...@@ -347,13 +400,7 @@ int PredictorClient::numpy_predict( ...@@ -347,13 +400,7 @@ int PredictorClient::numpy_predict(
model._lod_map[name][i] = output.tensor(idx).lod(i); model._lod_map[name][i] = output.tensor(idx).lod(i);
} }
} }
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == P_INT64) { if (_fetch_name_to_type[name] == P_INT64) {
VLOG(2) << "ferch var " << name << "type int64"; VLOG(2) << "ferch var " << name << "type int64";
int size = output.tensor(idx).int64_data_size(); int size = output.tensor(idx).int64_data_size();
...@@ -372,8 +419,16 @@ int PredictorClient::numpy_predict( ...@@ -372,8 +419,16 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>( model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size); output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} }
idx += 1;
} }
predict_res_batch.add_model_res(std::move(model)); predict_res_batch.add_model_res(std::move(model));
} }
...@@ -403,6 +458,36 @@ int PredictorClient::numpy_predict( ...@@ -403,6 +458,36 @@ int PredictorClient::numpy_predict(
} }
_api.thrd_clear(); _api.thrd_clear();
std::ostringstream oss;
oss << "[client]"
<< "logid=" << log_id <<",";
if (FLAGS_profile_client) {
double pre_cost = (preprocess_end - preprocess_start) / 1000.0;
double infer_cost = (client_infer_end - client_infer_start) / 1000.0;
double post_cost = (postprocess_end - postprocess_start) / 1000.0;
oss << "client_pre_cost=" << pre_cost << "ms,"
<< "client_infer_cost=" << infer_cost << "ms,"
<< "client_post_cost=" << post_cost << "ms,";
}
double client_cost = (postprocess_end - preprocess_start) / 1000.0;
oss << "client_cost=" << client_cost << "ms,";
int op_num = res.profile_time_size() / 2;
if (FLAGS_profile_server) {
for (int i = 0; i < op_num - 1; ++i) {
double t = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "op" << i << "=" << t << "ms,";
}
}
if (op_num > 0) {
int i = op_num - 1;
double server_cost = (res.profile_time(i * 2 + 1)
- res.profile_time(i * 2)) / 1000.0;
oss << "server_cost=" << server_cost << "ms.";
}
LOG(INFO) << oss.str();
return 0; return 0;
} }
} // namespace general_model } // namespace general_model
......
...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
}); });
return py::array(ptr->size(), ptr->data(), capsule); return py::array(ptr->size(), ptr->data(), capsule);
}) })
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape", .def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) { [](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>( std::vector<int> *ptr = new std::vector<int>(
...@@ -69,7 +82,10 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -69,7 +82,10 @@ PYBIND11_MODULE(serving_client, m) {
}) })
.def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); }) .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
.def("get_engine_names", .def("get_engine_names",
[](PredictorRes &self) { return self.get_engine_names(); }); [](PredictorRes &self) { return self.get_engine_names(); })
.def("get_tensor_alias_names", [](PredictorRes &self, int model_idx) {
return self.get_tensor_alias_names(model_idx);
});
py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol()) py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
.def(py::init()) .def(py::init())
...@@ -101,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -101,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::string> &float_feed_name, const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape, const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch, const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int_feed, const std::vector<py::array_t<int32_t>> &int32_feed,
const std::vector<std::string> &int_feed_name, const std::vector<std::string> &int32_feed_name,
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int32_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch, const std::vector<std::vector<int>> &int32_lod_slot_batch,
const std::vector<py::array_t<int64_t>> &int64_feed,
const std::vector<std::string> &int64_feed_name,
const std::vector<std::vector<int>> &int64_shape,
const std::vector<std::vector<int>> &int64_lod_slot_batch,
const std::vector<std::string> &string_feed, const std::vector<std::string> &string_feed,
const std::vector<std::string> &string_feed_name, const std::vector<std::string> &string_feed_name,
const std::vector<std::vector<int>> &string_shape, const std::vector<std::vector<int>> &string_shape,
...@@ -117,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -117,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) {
float_feed_name, float_feed_name,
float_shape, float_shape,
float_lod_slot_batch, float_lod_slot_batch,
int_feed, int32_feed,
int_feed_name, int32_feed_name,
int_shape, int32_shape,
int_lod_slot_batch, int32_lod_slot_batch,
int64_feed,
int64_feed_name,
int64_shape,
int64_lod_slot_batch,
string_feed, string_feed,
string_feed_name, string_feed_name,
string_shape, string_shape,
......
...@@ -191,42 +191,64 @@ int GeneralDetectionOp::inference() { ...@@ -191,42 +191,64 @@ int GeneralDetectionOp::inference() {
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
for (int i = boxes.size() - 1; i >= 0; i--) { float max_wh_ratio = 0.0f;
crop_img = GetRotateCropImage(img, boxes[i]); std::vector<cv::Mat> crop_imgs;
std::vector<cv::Mat> resize_imgs;
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); int max_resize_w = 0;
int max_resize_h = 0;
int box_num = boxes.size();
std::vector<std::vector<float>> output_rec;
for (int i = 0; i < box_num; ++i) {
cv::Mat line_img = GetRotateCropImage(img, boxes[i]);
float wh_ratio = float(line_img.cols) / float(line_img.rows);
max_wh_ratio = max_wh_ratio > wh_ratio ? max_wh_ratio : wh_ratio;
crop_imgs.push_back(line_img);
}
for (int i = 0; i < box_num; ++i) {
cv::Mat resize_img;
crop_img = crop_imgs[i];
this->resize_op_rec.Run( this->resize_op_rec.Run(
crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_); crop_img, resize_img, max_wh_ratio, this->use_tensorrt_);
this->normalize_op_.Run( this->normalize_op_.Run(
&resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_); &resize_img, this->mean_rec, this->scale_rec, this->is_scale_);
std::vector<float> output_rec( max_resize_w = std::max(max_resize_w, resize_img.cols);
1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f); max_resize_h = std::max(max_resize_h, resize_img.rows);
resize_imgs.push_back(resize_img);
this->permute_op_.Run(&resize_img_rec, output_rec.data()); }
int buf_size = 3 * max_resize_h * max_resize_w;
// Inference. output_rec = std::vector<std::vector<float>>(box_num,
output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; std::vector<float>(buf_size, 0.0f));
out_num = std::accumulate( for (int i = 0; i < box_num; ++i) {
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); resize_img_rec = resize_imgs[i];
databuf_size_out = out_num * sizeof(float);
databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out); this->permute_op_.Run(&resize_img_rec, output_rec[i].data());
if (!databuf_data_out) { }
LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
return -1; // Inference.
} output_shape = {box_num, 3, max_resize_h, max_resize_w};
memcpy(databuf_data_out, output_rec.data(), databuf_size_out); out_num = std::accumulate(
databuf_char_out = reinterpret_cast<char*>(databuf_data_out); output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); databuf_size_out = out_num * sizeof(float);
paddle::PaddleTensor tensor_out; databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
tensor_out.name = "image"; if (!databuf_data_out) {
tensor_out.dtype = paddle::PaddleDType::FLOAT32; LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols}; return -1;
tensor_out.data = paddleBuf; }
out->push_back(tensor_out); int offset = buf_size * sizeof(float);
for (int i = 0; i < box_num; ++i) {
memcpy(databuf_data_out + i * offset, output_rec[i].data(), offset);
} }
databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
paddle::PaddleTensor tensor_out;
tensor_out.name = "image";
tensor_out.dtype = paddle::PaddleDType::FLOAT32;
tensor_out.shape = output_shape;
tensor_out.data = paddleBuf;
out->push_back(tensor_out);
} }
out->erase(out->begin(), out->begin() + infer_outnum); out->erase(out->begin(), out->begin() + infer_outnum);
......
...@@ -63,7 +63,7 @@ class GeneralDetectionOp ...@@ -63,7 +63,7 @@ class GeneralDetectionOp
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 1.5;
std::vector<float> mean_det = {0.485f, 0.456f, 0.406f}; std::vector<float> mean_det = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include "core/cube/cube-api/include/cube_api.h" #include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/framework/cache.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
...@@ -36,10 +37,11 @@ using baidu::paddle_serving::predictor::general_model::Response; ...@@ -36,10 +37,11 @@ using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
using baidu::paddle_serving::predictor::CubeCache;
// DistKV Infer Op: seek cube and then call paddle inference // DistKV Infer Op: seek cube and then call paddle inference
// op seq: general_reader-> dist_kv_infer -> general_response // op seq: general_reader-> dist_kv_infer -> general_response
int GeneralDistKVInferOp::inference() { int GeneralDistKVInferOp::inference() {
VLOG(2) << "Going to run inference"; VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) { if (pre_node_names.size() != 1) {
...@@ -60,8 +62,8 @@ int GeneralDistKVInferOp::inference() { ...@@ -60,8 +62,8 @@ int GeneralDistKVInferOp::inference() {
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!output_blob) { if (!output_blob) {
LOG(ERROR) << "(logid=" << log_id << ") output_blob is nullptr,error"; LOG(ERROR) << "(logid=" << log_id << ") output_blob is nullptr,error";
return -1; return -1;
} }
output_blob->SetLogId(log_id); output_blob->SetLogId(log_id);
...@@ -70,21 +72,30 @@ int GeneralDistKVInferOp::inference() { ...@@ -70,21 +72,30 @@ int GeneralDistKVInferOp::inference() {
<< ") Failed mutable depended argument, op:" << pre_name; << ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
Timer timeline;
timeline.Start();
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<uint64_t> unique_keys;
std::unordered_map<uint64_t, rec::mcube::CubeValue *> key_map;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; // sparse inputs counts, sparse would seek cube // sparse inputs counts, sparse would seek cube
int dense_count = 0; // dense inputs counts, dense would directly call paddle infer int sparse_count = 0;
// dense inputs counts, dense would directly call paddle infer
int dense_count = 0;
std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs; std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
size_t key_len = 0; size_t key_len = 0;
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) { if (in->at(i).dtype != paddle::PaddleDType::INT64) {
// dense input type is not int64
++dense_count; ++dense_count;
continue; continue;
} }
// sparse input type is int64
++sparse_count; ++sparse_count;
size_t elem_num = 1; size_t elem_num = 1;
for (size_t s = 0; s < in->at(i).shape.size(); ++s) { for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
elem_num *= in->at(i).shape[s]; elem_num *= in->at(i).shape[s];
...@@ -94,7 +105,8 @@ int GeneralDistKVInferOp::inference() { ...@@ -94,7 +105,8 @@ int GeneralDistKVInferOp::inference() {
dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num)); dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
} }
keys.resize(key_len); keys.resize(key_len);
VLOG(3) << "(logid=" << log_id << ") cube number of keys to look up: " << key_len; unique_keys.resize(key_len);
int key_idx = 0; int key_idx = 0;
for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) { for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
std::copy(dataptr_size_pairs[i].first, std::copy(dataptr_size_pairs[i].first,
...@@ -102,20 +114,81 @@ int GeneralDistKVInferOp::inference() { ...@@ -102,20 +114,81 @@ int GeneralDistKVInferOp::inference() {
keys.begin() + key_idx); keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second; key_idx += dataptr_size_pairs[i].second;
} }
// filter dumplicate keys
int unique_keys_count = 0;
for (size_t i = 0; i < keys.size(); ++i) {
if (key_map.find(keys[i]) == key_map.end()) {
key_map[keys[i]] = nullptr;
unique_keys[unique_keys_count++] = keys[i];
}
}
unique_keys.resize(unique_keys_count);
VLOG(1) << "(logid=" << log_id
<< ") cube number of keys to look up: " << key_len
<< " uniq keys: " << unique_keys_count;
// fitler cache keys
size_t hit_counts = 0;
int64_t seek_cache_start = timeline.TimeStampUS();
CubeCache *p_cube_cache =
InferManager::instance().get_cube_cache(engine_name().c_str());
if (p_cube_cache != nullptr) {
for (size_t i = 0; i < unique_keys_count; ++i) {
rec::mcube::CubeValue *hit_val = p_cube_cache->get_data(unique_keys[i]);
if (hit_val) {
// LOG(WARNING) << "Hit one cache. key:" << unique_keys[i];
key_map[unique_keys[i]] = hit_val;
if (hit_counts % 100 == 0) {
LOG(WARNING) << "hit cache! key:" << unique_keys[i]
<< " value:" << hit_val->buff;
}
unique_keys[i] = 0;
++hit_counts;
}
}
} else {
LOG(WARNING) << "get cube cache fail. model: " << engine_name();
}
// clear unique keys which hit caches
if (hit_counts > 0) {
for (auto it = unique_keys.begin(); it < unique_keys.end();) {
if (*it == 0) {
it = unique_keys.erase(it);
--unique_keys_count;
} else {
++it;
}
}
}
int64_t seek_cache_end = timeline.TimeStampUS();
VLOG(2) << "cache hit " << hit_counts
<< " keys in cube cache, last unique_keys:" << unique_keys.size()
<< " , seek_time:" << seek_cache_end - seek_cache_start;
// seek sparse params
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "cube init error or cube config not given.";
return -1; return -1;
} }
// gather keys and seek cube servers, put results in values int64_t seek_start = timeline.TimeStampUS();
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], unique_keys, &values);
VLOG(3) << "(logid=" << log_id << ") cube seek status: " << ret; int64_t seek_end = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id << ") cube seek status: " << ret
<< " , unique_key: " << unique_keys.size()
<< " , seek_time: " << seek_end - seek_start;
for (size_t i = 0; i < unique_keys.size(); ++i) {
key_map[unique_keys[i]] = &values[i];
}
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "cube value return null";
} }
// EMBEDDING_SIZE means the length of sparse vector, user can define length here. size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); // size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float);
//size_t EMBEDDING_SIZE = 9;
TensorVector sparse_out; TensorVector sparse_out;
sparse_out.resize(sparse_count); sparse_out.resize(sparse_count);
TensorVector dense_out; TensorVector dense_out;
...@@ -126,8 +199,10 @@ int GeneralDistKVInferOp::inference() { ...@@ -126,8 +199,10 @@ int GeneralDistKVInferOp::inference() {
std::unordered_map<int, int> in_out_map; std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
std::shared_ptr<PaddleGeneralModelConfig> model_config = resource.get_general_model_config().front(); std::shared_ptr<PaddleGeneralModelConfig> model_config =
//copy data to tnsor resource.get_general_model_config().front();
int cube_key_found = 0;
int cube_key_miss = 0;
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) { if (in->at(i).dtype != paddle::PaddleDType::INT64) {
dense_out[dense_idx] = in->at(i); dense_out[dense_idx] = in->at(i);
...@@ -142,43 +217,75 @@ int GeneralDistKVInferOp::inference() { ...@@ -142,43 +217,75 @@ int GeneralDistKVInferOp::inference() {
sparse_out[sparse_idx].lod[x].begin()); sparse_out[sparse_idx].lod[x].begin());
} }
sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32; sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
sparse_out[sparse_idx].shape.push_back(sparse_out[sparse_idx].lod[0].back()); sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i]; sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float)); EMBEDDING_SIZE * sizeof(float));
float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data()); float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
if (!dst_ptr) {
VLOG(2) << "dst_ptr is null. sparse_idx:" << sparse_idx;
continue;
}
for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) { for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
float *data_ptr = dst_ptr + x * EMBEDDING_SIZE; float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
memcpy(data_ptr, uint64_t cur_key = keys[cube_val_idx];
values[cube_val_idx].buff.data(), rec::mcube::CubeValue *cur_val = key_map[cur_key];
values[cube_val_idx].buff.size()); if (cur_val->buff.size() == 0) {
cube_val_idx++; memset(data_ptr, (float)0.0, sizeof(float) * EMBEDDING_SIZE);
++cube_key_miss;
++cube_val_idx;
continue;
}
// The data generated by pslib has 10 bytes of information to be filtered
// out
memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size() );
// VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
// data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
// <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "
// <<data_ptr[7] << ", " <<data_ptr[8];
++cube_key_found;
++cube_val_idx;
} }
++sparse_idx; ++sparse_idx;
} }
VLOG(3) << "(logid=" << log_id << ") sparse tensor load success."; bool cube_fail = (cube_key_found == 0);
if (cube_fail) {
LOG(WARNING) << "(logid=" << log_id << ") cube seek fail";
}
VLOG(2) << "(logid=" << log_id << ") cube key found: " << cube_key_found
<< " , cube key miss: " << cube_key_miss;
VLOG(2) << "(logid=" << log_id << ") sparse tensor load success.";
timeline.Pause();
VLOG(2) << "dist kv, cube and datacopy time: " << timeline.ElapsedUS();
TensorVector infer_in; TensorVector infer_in;
infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end()); infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
int batch_size = input_blob->_batch_size; int batch_size = input_blob->_batch_size;
output_blob->_batch_size = batch_size; output_blob->_batch_size = batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
timeline.Start(); timeline.Start();
// call paddle inference here // call paddle inference here
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id << ") Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
if (cube_fail) {
float *out_ptr = static_cast<float *>(out->at(0).data.data());
out_ptr[0] = 0.0;
}
timeline.Pause();
VLOG(2) << "dist kv, pure paddle infer time: " << timeline.ElapsedUS();
CopyBlobInfo(input_blob, output_blob); CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start); AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end); AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralDistKVInferOp); DEFINE_OP(GeneralDistKVInferOp);
......
...@@ -31,7 +31,22 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -31,7 +31,22 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// read request from client // read request from client
...@@ -78,6 +93,7 @@ int GeneralReaderOp::inference() { ...@@ -78,6 +93,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0; int64_t elem_type = 0;
int64_t elem_size = 0; int64_t elem_size = 0;
int64_t databuf_size = 0; int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor; paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i); const Tensor &tensor = req->tensor(i);
...@@ -86,19 +102,38 @@ int GeneralReaderOp::inference() { ...@@ -86,19 +102,38 @@ int GeneralReaderOp::inference() {
elem_size = 0; elem_size = 0;
databuf_size = 0; databuf_size = 0;
elem_type = tensor.elem_type(); elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type; src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64 if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t); elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64; paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size(); data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) { } else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float); elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32; paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size(); data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) { } else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t); elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32; paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// copy bytes from tensor content to TensorVector
elem_size = 1;
paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) { } else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String. // use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char); elem_size = sizeof(char);
...@@ -109,8 +144,18 @@ int GeneralReaderOp::inference() { ...@@ -109,8 +144,18 @@ int GeneralReaderOp::inference() {
// now only support single string // now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) { for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1; data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
} }
} }
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
}
// implement lod tensor here // implement lod tensor here
// only support 1-D lod // only support 1-D lod
// TODO(HexToString): support 2-D lod // TODO(HexToString): support 2-D lod
...@@ -141,44 +186,17 @@ int GeneralReaderOp::inference() { ...@@ -141,44 +186,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back(); << "] has lod_tensor and len=" << out->at(i).lod[0].back();
} }
if (elem_type == P_INT64) { void* dst_ptr = out->at(i).data.data();
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data()); if (!dst_ptr) {
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i LOG(ERROR) << "dst_ptr is nullptr";
<< "] is " << tensor.int64_data(0); return -1;
if (!dst_ptr) { }
LOG(ERROR) << "dst_ptr is nullptr";
return -1; // For common data, we just copy from src to dst
} // For string data, we need to iterate through all str
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size); if (elem_type != P_STRING) {
/* memcpy(dst_ptr, src_ptr, databuf_size);
int elem_num = tensor.int64_data_size(); } else {
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) {
char *dst_ptr = static_cast<char *>(out->at(i).data.data()); char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0); << "] is " << tensor.data(0);
......
...@@ -74,10 +74,19 @@ int GeneralResponseOp::inference() { ...@@ -74,10 +74,19 @@ int GeneralResponseOp::inference() {
// and the order of Output is the same as the prototxt FetchVar. // and the order of Output is the same as the prototxt FetchVar.
// otherwise, you can only get the Output by the corresponding of // otherwise, you can only get the Output by the corresponding of
// Name -- Alias_name. // Name -- Alias_name.
fetch_index.resize(req->fetch_var_names_size()); if (req->fetch_var_names_size() > 0) {
for (int i = 0; i < req->fetch_var_names_size(); ++i) { fetch_index.resize(req->fetch_var_names_size());
fetch_index[i] = for (int i = 0; i < req->fetch_var_names_size(); ++i) {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
} else {
fetch_index.resize(model_config->_fetch_alias_name.size());
for (int i = 0; i < model_config->_fetch_alias_name.size(); ++i) {
fetch_index[i] =
model_config
->_fetch_alias_name_to_index[model_config->_fetch_alias_name[i]];
}
} }
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
...@@ -105,7 +114,7 @@ int GeneralResponseOp::inference() { ...@@ -105,7 +114,7 @@ int GeneralResponseOp::inference() {
// fetch_index is the real index in FetchVar of Fetchlist // fetch_index is the real index in FetchVar of Fetchlist
// for example, FetchVar = {0:A, 1:B, 2:C} // for example, FetchVar = {0:A, 1:B, 2:C}
// FetchList = {0:C,1:A}, at this situation. // FetchList = {0:C,1:A}, at this situation.
// fetch_index = [2,0], C`index = 2 and A`index = 0 // fetch_index = [2,0], C`index = 2 and A`index = 0
for (auto &idx : fetch_index) { for (auto &idx : fetch_index) {
Tensor *tensor = output->add_tensor(); Tensor *tensor = output->add_tensor();
tensor->set_name(in->at(idx).name); tensor->set_name(in->at(idx).name);
...@@ -159,6 +168,21 @@ int GeneralResponseOp::inference() { ...@@ -159,6 +168,21 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data); output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
} else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::FLOAT16) {
tensor->set_elem_type(5);
VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} }
VLOG(2) << "(logid=" << log_id << ") fetch var [" VLOG(2) << "(logid=" << log_id << ") fetch var ["
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
import "pds_option.proto"; import "pds_option.proto";
import "builtin_format.proto"; import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; ...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true; option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -276,43 +276,65 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -276,43 +276,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name", "output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name())); google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") { if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print( printer->Print(
" baidu::rpc::ClosureGuard done_guard(done);\n" inference_body.c_str(),
" baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1021,45 +1043,65 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1021,45 +1043,65 @@ class PdsCodeGenerator : public CodeGenerator {
"output_name", "output_name",
google::protobuf::dots_to_colons(m->output_type()->full_name())); google::protobuf::dots_to_colons(m->output_type()->full_name()));
if (m->name() == "inference") { if (m->name() == "inference") {
std::string inference_body = "";
inference_body += " brpc::ClosureGuard done_guard(done);\n";
inference_body += " brpc::Controller* cntl = \n";
inference_body += " static_cast<brpc::Controller*>(cntl_base);\n";
inference_body += " cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n";
inference_body += " uint64_t log_id = request->log_id();\n";
inference_body += " cntl->set_log_id(log_id);\n";
inference_body += " ::baidu::paddle_serving::predictor::InferService* svr = \n";
inference_body += " ";
inference_body += "::baidu::paddle_serving::predictor::InferServiceManager::instance(";
inference_body += ").item(\"$service$\");\n";
inference_body += " if (svr == NULL) {\n";
inference_body += " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: ";
inference_body += "$service$\";\n";
inference_body += " cntl->SetFailed(404, \"Not found service: $service$\");\n";
inference_body += " return ;\n";
inference_body += " }\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "remote_side=\[\" << cntl->remote_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "local_side=\[\" << cntl->local_side() << "; // NOLINT
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
inference_body += " int err_code = svr->inference(request, response, log_id);\n";
inference_body += " if (err_code != 0) {\n";
inference_body += " LOG(WARNING)\n";
inference_body += " << \"(logid=\" << log_id << \") Failed call ";
inference_body += "inferservice[$name$], name[$service$]\"\n";
inference_body += " << \", error_code: \" << err_code;\n";
inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
inference_body += "failed!\");\n";
inference_body += " }\n";
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
inference_body += " std::ostringstream oss;\n";
inference_body += " oss << \"[serving]\"\n";
inference_body += " << \"logid=\" << log_id << \",\";\n";
inference_body += " int op_num = response->profile_time_size() / 2;\n";
inference_body += " for (int i = 0; i < op_num; ++i) {\n";
inference_body += " double t = (response->profile_time(i * 2 + 1)\n";
inference_body += " - response->profile_time(i * 2)) / 1000.0;\n";
inference_body += " oss << \"op\" << i << \"=\" << t << \"ms,\";\n";
inference_body += " }\n";
inference_body += " double total_time = (end - start) / 1000.0;\n";
inference_body += " oss << \"cost=\" << total_time << \"ms.\";\n";
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << oss.str();\n";
inference_body += " response->add_profile_time(start);\n";
inference_body += " response->add_profile_time(end);\n";
} else {
inference_body += " // flush notice log\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - "; // NOLINT
inference_body += "start) << \"\]\";\n";
}
printer->Print( printer->Print(
" brpc::ClosureGuard done_guard(done);\n" inference_body.c_str(),
" brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
" }\n"
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1492,11 +1534,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1492,11 +1534,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si]; const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name(); std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name); printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() == if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE || google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) { fd->is_repeated()) {
...@@ -1509,10 +1546,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1509,10 +1546,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name", "field_name",
field_name); field_name);
} }
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
} }
printer->Print( printer->Print(
......
...@@ -25,7 +25,7 @@ DEFINE_int32(port, 8010, ""); ...@@ -25,7 +25,7 @@ DEFINE_int32(port, 8010, "");
DEFINE_string(workflow_path, "./conf", ""); DEFINE_string(workflow_path, "./conf", "");
DEFINE_string(workflow_file, "workflow.prototxt", ""); DEFINE_string(workflow_file, "workflow.prototxt", "");
DEFINE_string(inferservice_path, "./conf", ""); DEFINE_string(inferservice_path, "./conf", "");
DEFINE_string(inferservice_file, "service.prototxt", ""); DEFINE_string(inferservice_file, "infer_service.prototxt", "");
DEFINE_string(logger_path, "./conf", ""); DEFINE_string(logger_path, "./conf", "");
DEFINE_string(logger_file, "log.conf", ""); DEFINE_string(logger_file, "log.conf", "");
DEFINE_string(resource_path, "./conf", ""); DEFINE_string(resource_path, "./conf", "");
......
FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp) FILE(GLOB framework_srcs ${CMAKE_CURRENT_LIST_DIR}/*.cpp ${CMAKE_CURRENT_LIST_DIR}/../../cube/cube-builder/src/seqfile_reader.cpp)
LIST(APPEND pdserving_srcs ${framework_srcs}) LIST(APPEND pdserving_srcs ${framework_srcs})
LIST(APPEND pclient_srcs ${framework_srcs}) LIST(APPEND pclient_srcs ${framework_srcs})
...@@ -26,9 +26,90 @@ ...@@ -26,9 +26,90 @@
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
// this file is included by bsf.h
namespace im { namespace im {
namespace bsf { namespace bsf {
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_init(BatchTasks<TaskT>& batchTask) {
// 双检锁,减少加锁的粒度
if (!fetch_init) {
if (taskmeta_num > 1) {
// 对于task被拆分为多个taskmeta,需要加锁。
AutoMutex lock(task_mut);
task_fetch_create(batchTask);
} else {
// 对于task只有1个taskmeta,不需要加锁。
task_fetch_create(batchTask);
}
}
return true;
}
template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
if (!fetch_init) {
vector_fetch_lod_index = batchTask.vector_fetch_lod_index;
set_fetch_nobatch_index = batchTask.set_fetch_nobatch_index;
OutVectorT taskMetaOutLodTensor;
size_t fetchvar_num = batchTask._batch_out.size();
for (size_t fetchvar_index = 0; fetchvar_index < fetchvar_num;
++fetchvar_index) {
size_t fetchvar_bytesize_index =
batchTask.fetchvar_bytesize(fetchvar_index);
size_t fetchvar_batch = 0;
// 1. nobatch fetchvar情况
if (set_fetch_nobatch_index.size() > 0 &&
set_fetch_nobatch_index.find(fetchvar_index) !=
set_fetch_nobatch_index.end()) {
fetchvar_batch = 1;
} else if (vector_fetch_lod_index.size() > 0 &&
std::find(vector_fetch_lod_index.begin(),
vector_fetch_lod_index.end(),
fetchvar_index) != vector_fetch_lod_index.end()) {
// lod fetchvar情况,此时无法确定总的shape[0]
// 根据task中的task_num总数开辟task_num个临时空间
// 每个lod型的fetchvar拷贝到对应的临时空间中
// 最后再计算临时空间的总量,合并fetchvar和lod
fetchvar_batch = 0;
} else {
// 普通fetchvar情况,此时该Task总的fetchvar_batch =
// 输入的总的batch_size()
fetchvar_batch = batch_size();
}
paddle::PaddleTensor tensor_out;
tensor_out.name = batchTask._batch_out[fetchvar_index].name;
tensor_out.dtype =
paddle::PaddleDType(batchTask._batch_out[fetchvar_index].dtype);
tensor_out.shape = batchTask._batch_out[fetchvar_index].shape;
tensor_out.shape[0] = fetchvar_batch;
if (fetchvar_batch != 0) {
// 此时 lod 为空。
tensor_out.lod = batchTask._batch_out[fetchvar_index].lod;
// resize all batch memory at one time
size_t databuf_size = fetchvar_batch * fetchvar_bytesize_index;
tensor_out.data.Resize(databuf_size);
} else {
// 当taskmeta_num = 1时,由于同时只有一个taskMeta操作task
// 不涉及线程安全问题,所以此时可以直接由taskMeta->task->resize->copy
// 当task被分为多个taskMeta时,需要临时对象记录
// 收齐后再一起合并
if (taskmeta_num > 1) {
taskMetaOutLodTensor.push_back(tensor_out);
}
}
outVectorT_ptr->push_back(tensor_out);
}
// outLodTensorVector实际是一个双层vector
// shape为taskmeta_num * vector_fetch_lod_index.size();
outLodTensorVector.resize(taskmeta_num, taskMetaOutLodTensor);
fetch_init = true;
}
return true;
}
template <typename TaskT> template <typename TaskT>
void* TaskExecutor<TaskT>::thread_entry(void* args) { void* TaskExecutor<TaskT>::thread_entry(void* args) {
ThreadContext<TaskT>* context = static_cast<ThreadContext<TaskT>*>(args); ThreadContext<TaskT>* context = static_cast<ThreadContext<TaskT>*>(args);
...@@ -134,9 +215,10 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -134,9 +215,10 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
LOG(ERROR) << "Failed get TaskT from object pool"; LOG(ERROR) << "Failed get TaskT from object pool";
return TaskHandler<TaskT>::valid_handle(); return TaskHandler<TaskT>::valid_handle();
} }
task->clear();
/* /*
if (!BatchTasks<TaskT>::check_valid(in, out, _batch_align)) { if (!BatchTasks<TaskT>::check_valid(in, out, _overrun)) {
LOG(ERROR) << "Invalid input & output"; LOG(ERROR) << "Invalid input & output";
return TaskHandler<TaskT>::valid_handle(); return TaskHandler<TaskT>::valid_handle();
} }
...@@ -156,9 +238,11 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -156,9 +238,11 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr; task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr;
task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr; task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr;
if (!task->task_init()) {
LOG(ERROR) << "task->init() failed";
}
task->rem = task->batch_size(); task->rem = task->batch_size();
task->index.store(0, butil::memory_order_relaxed); task->index.store(0, butil::memory_order_relaxed);
AutoMutex lock(_mut); AutoMutex lock(_mut);
_task_queue.push_back(task); _task_queue.push_back(task);
THREAD_COND_SIGNAL(&_cond); THREAD_COND_SIGNAL(&_cond);
...@@ -168,11 +252,12 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule( ...@@ -168,11 +252,12 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
// this function is accessed by multi thread. // this function is accessed by multi thread.
// so AutoMutex at first. // so AutoMutex at first.
// so batch.append_task is thread safe. // so batchTask.append_task is thread safe.
// you dont need to add extra lock in append_task() // you dont need to add extra lock in append_task()
// task is already init.
template <typename TaskT> template <typename TaskT>
bool TaskExecutor<TaskT>::move_task_to_batch( bool TaskExecutor<TaskT>::move_task_to_batch(
BatchTasks<TaskT>& batch) { // NOLINT BatchTasks<TaskT>& batchTask) { // NOLINT
AutoMutex lock(_mut); AutoMutex lock(_mut);
while (_task_queue.empty()) { while (_task_queue.empty()) {
THREAD_COND_WAIT(&_cond, &_mut); THREAD_COND_WAIT(&_cond, &_mut);
...@@ -183,15 +268,65 @@ bool TaskExecutor<TaskT>::move_task_to_batch( ...@@ -183,15 +268,65 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
return false; return false;
} }
TaskT* previous_task = nullptr;
while (!_task_queue.empty()) { while (!_task_queue.empty()) {
TaskT* task = _task_queue.front(); TaskT* task = _task_queue.front();
size_t rem = batch.append_task(task);
// 由于无法确定fetchVar是否为lod(即使输入是非lod,输出也可能是lod)
// 简单的处理方法是:task不能被拆分,即用户的请求可以合并一起预测,但不能拆分两个小部分去预测。
// 只需要设置engine的属性allow_split_request = false即可。
// 复杂的处理方法是允许拆分Task,无论是否包含lod.
// 难点:预测前,能够知道被拆成了几个taskmeta,但只有预测后,才知道有多少个fetchvar,多少个lod的fetchvar
// 所以,task中先要创建taskmeta_num* fetchvar
// num(lod类型的)个临时PaddleTensor(存储data及Lod)
// 由于多线程调度的单位是taskmeta,故只能在notify_task中,用taskmeta->task去创建
// 此时由于多个taskmeta对应一个task,存在多线程竞争,所以需要在task中加锁。
// 原子操作不可行,因为多个线程必须等待创建好上述的PaddleTensor后才能继续。
// 对于普通的fetch,也需要加锁去创建PaddleTensor,后续才能往里拷贝。
// _overrun表示,异步BatchTasks是否允许单次临时超过限制。
// _overrun为true时,即使BatchTasks剩下1-batch,也会全放入一个完整的Task,允许临时超限。
// _overrun为false时,不允许。
// 对于模型本身有最大Batch限制的情况,应将该值设为false,默认为false。
// 对于模型本身无最大Batch限制,但自己设置了BatchTasks的最大Batch,可以考虑设置为True。
// _allow_split_request ==
// true,则允许拆分task.BatchTasks剩下1-batch,则会从下一个Task中拆出1-Batch
// _allow_split_request ==
// false,则每个task不会被拆分。BatchTasks剩下1-batch会被浪费
// 默认为true,允许拆分task从而使得空间利用率最大。
if (!batchTask.get_allow_split_request()) {
if (task->batch_size() > batchTask.get_rem_size() &&
!batchTask.get_overrun()) {
break;
}
}
// combine_task_valid负责判断是否能够合并
// 除最外层的shape外,内层shape应一致才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 以此保证batch.append_task(task)中的task的内层shape相同。
// 对于Shape[0] = 1 而!=batch的情况,因为合并时,取其中一个的值
// 所以要求该feedvar必须相等,才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 目前没有PaddleTensor和PaddleBuff没有重载==,所以只能比较内存.
// TODO(HexToString): 可以考虑后期支持AutoPadding.
if (previous_task != nullptr) {
if (!task->combine_task_valid(previous_task)) {
break;
}
}
size_t rem = batchTask.append_task(task);
previous_task = task;
if (task->rem <= 0) { if (task->rem <= 0) {
_task_queue.pop_front(); _task_queue.pop_front();
} }
if (rem <= 0) break; if (rem <= 0) break;
} }
LOG(INFO) << "Number of tasks remaining in _task_queue is"
<< _task_queue.size();
return true; return true;
} }
...@@ -201,11 +336,12 @@ bool TaskExecutor<TaskT>::move_task_to_batch( ...@@ -201,11 +336,12 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
// TaskT is from the SingleTon TaskExecutor`s _task_queue // TaskT is from the SingleTon TaskExecutor`s _task_queue
// although TaskMeta is a local variable, but several TaskMeta may points to // although TaskMeta is a local variable, but several TaskMeta may points to
// the same TaskT which is get from the SingleTon TaskExecutor`s _task_queue. // the same TaskT which is get from the SingleTon TaskExecutor`s _task_queue.
// put TaskMeta to the local variable BatchTasks<TaskT> batch. // put TaskMeta to the local variable BatchTasks<TaskT> batchTask.
// batch.merge_tasks() and batch.notify_tasks() has no lock. // batchTask.merge_tasks() and batchTask.notify_tasks() has no lock.
// BatchTasks<TaskT> batch itself is a local variable, it`s thread safe. // BatchTasks<TaskT> batchTask itself is a local variable, it`s thread safe.
// If batch.merge_tasks() and batch.notify_tasks() do something to TaskMeta // If batchTask.merge_tasks() and batchTask.notify_tasks() do something to
// TaskMeta
// you need to pay attention to that. // you need to pay attention to that.
// Multi-Thread deal with different TaskMeta(cause it`s created as local // Multi-Thread deal with different TaskMeta(cause it`s created as local
// variable) // variable)
...@@ -242,11 +378,23 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) { ...@@ -242,11 +378,23 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) {
return -1; return -1;
} }
BatchTasks<TaskT> batch(_batch_size, _batch_align); // move_task_to_batch() take the original task from the `_task_queue`
if (move_task_to_batch(batch)) { // put the original task into its own Vector<taskmeta>
batch.merge_tasks(); // the capacity of its own Vector<taskmeta> is decided by `_batch_size` or
_fn(&batch.in(), &batch.out()); // `_overrun`
batch.notify_tasks();
// merge_tasks() move the imput-data into `_batch_in` from its own
// Vector<taskmeta>.
// because the predictor`s input is the `_batch_in`
// notify_tasks() move the output-data into every single taskmeta from
// `_batch_out`.
// because the predictor`s output is the `_batch_out`
BatchTasks<TaskT> batchTask(_batch_size, _overrun, _allow_split_request);
if (move_task_to_batch(batchTask)) {
batchTask.merge_tasks();
_fn(&batchTask.in(), &batchTask.out());
batchTask.notify_tasks();
} }
} }
......
...@@ -16,7 +16,9 @@ ...@@ -16,7 +16,9 @@
#include <errno.h> #include <errno.h>
#include <algorithm> #include <algorithm>
#include <cstring>
#include <list> #include <list>
#include <set>
#include <vector> #include <vector>
#ifdef BCLOUD #ifdef BCLOUD
...@@ -46,7 +48,8 @@ static const size_t DEFAULT_BATCH_SIZE = 100; ...@@ -46,7 +48,8 @@ static const size_t DEFAULT_BATCH_SIZE = 100;
// `rem` don`t need to be atomic, cause the operation `put` is synchronous. // `rem` don`t need to be atomic, cause the operation `put` is synchronous.
// actually, the reason is that lock have been added outside the operation // actually, the reason is that lock have been added outside the operation
// `put`. // `put`.
template <typename TaskT>
class BatchTasks;
// size_t `index` records how many batch have been processing completed. // size_t `index` records how many batch have been processing completed.
// `index` need to be atomic, cause the operation 'notify' is asynchronous. // `index` need to be atomic, cause the operation 'notify' is asynchronous.
template <typename InItemT, typename OutItemT> template <typename InItemT, typename OutItemT>
...@@ -56,7 +59,7 @@ struct Task { ...@@ -56,7 +59,7 @@ struct Task {
typedef InItemT InType; typedef InItemT InType;
typedef OutItemT OutType; typedef OutItemT OutType;
typedef Task<InItemT, OutItemT> TaskT; typedef Task<InItemT, OutItemT> TaskT;
typedef std::vector<int> ShapeVector; typedef std::vector<size_t> ShapeVector;
typedef std::vector<ShapeVector> VectorOfShapeVector; typedef std::vector<ShapeVector> VectorOfShapeVector;
int read_fd; int read_fd;
...@@ -65,7 +68,17 @@ struct Task { ...@@ -65,7 +68,17 @@ struct Task {
const InVectorT* inVectorT_ptr; const InVectorT* inVectorT_ptr;
OutVectorT* outVectorT_ptr; OutVectorT* outVectorT_ptr;
size_t rem; size_t rem;
size_t total_feed_batch;
std::set<size_t> set_feed_lod_index;
std::set<size_t> set_feed_nobatch_index;
std::vector<size_t> vector_fetch_lod_index;
std::set<size_t> set_fetch_nobatch_index;
butil::atomic<size_t> index; butil::atomic<size_t> index;
size_t taskmeta_num;
THREAD_MUTEX_T task_mut;
bool fetch_init;
// taskmeta_num * set_feed_lod_index.size()
std::vector<OutVectorT> outLodTensorVector;
Task() { Task() {
read_fd = -1; read_fd = -1;
...@@ -73,11 +86,57 @@ struct Task { ...@@ -73,11 +86,57 @@ struct Task {
owner_tid = -1; owner_tid = -1;
inVectorT_ptr = NULL; inVectorT_ptr = NULL;
outVectorT_ptr = NULL; outVectorT_ptr = NULL;
set_feed_lod_index.clear();
set_feed_nobatch_index.clear();
vector_fetch_lod_index.clear();
set_fetch_nobatch_index.clear();
rem = -1; rem = -1;
total_feed_batch = 0;
taskmeta_num = 0;
index.store(0, butil::memory_order_relaxed); index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_INIT(&task_mut, NULL);
fetch_init = false;
outLodTensorVector.clear();
}
~Task() {
read_fd = -1;
write_fd = -1;
owner_tid = -1;
inVectorT_ptr = NULL;
outVectorT_ptr = NULL;
set_feed_lod_index.clear();
set_feed_nobatch_index.clear();
vector_fetch_lod_index.clear();
set_fetch_nobatch_index.clear();
rem = -1;
total_feed_batch = 0;
taskmeta_num = 0;
index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_DESTROY(&task_mut);
fetch_init = false;
outLodTensorVector.clear();
} }
bool check_feedvar_valid(int feedvar_index) { void clear(){
read_fd = -1;
write_fd = -1;
owner_tid = -1;
inVectorT_ptr = NULL;
outVectorT_ptr = NULL;
set_feed_lod_index.clear();
set_feed_nobatch_index.clear();
vector_fetch_lod_index.clear();
set_fetch_nobatch_index.clear();
rem = -1;
total_feed_batch = 0;
taskmeta_num = 0;
index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_INIT(&task_mut, NULL);
fetch_init = false;
outLodTensorVector.clear();
}
bool check_feedvar_valid(size_t feedvar_index) {
if (feedvar_index < 0 || inVectorT_ptr->size() <= feedvar_index) { if (feedvar_index < 0 || inVectorT_ptr->size() <= feedvar_index) {
LOG(ERROR) << "feedvar doesnt exsit or feedvar_index error"; LOG(ERROR) << "feedvar doesnt exsit or feedvar_index error";
return 0; return 0;
...@@ -91,20 +150,47 @@ struct Task { ...@@ -91,20 +150,47 @@ struct Task {
return 1; return 1;
} }
// Now, it simply assume that the first dimension of data is batch. bool combine_task_valid(Task* other_task) {
// so the batch is PaddleTensor.shape[0] // TODO(HexToString): auto-padding
// 除最外层的shape外,内层shape应一致才能合并。
// 否则跳出循环,放入下一个batchTask中。
// 以此保证batch.append_task(task)中的task的内层shape相同。
if (other_task->feedvar_shape_nobatch() != feedvar_shape_nobatch()) {
return false;
}
// 对于Shape[0] = 1 而!=batch的情况,因为合并时,取其中一个的值
// 所以要求该feedvar必须相等,才能合并。
// 目前没有PaddleTensor和PaddleBuff没有重载==,所以只能比较内存.
for (size_t feedvar_index = 0;
feedvar_index < set_feed_nobatch_index.size();
++feedvar_index) {
int result =
std::memcmp((*inVectorT_ptr)[feedvar_index].data.data(),
(*(other_task->inVectorT_ptr))[feedvar_index].data.data(),
(*inVectorT_ptr)[feedvar_index].data.length());
if (result != 0) return false;
}
return true;
}
// If batch information is added into feedvar.prototxt. size_t feedvar_batch_size(size_t feedvar_index) {
// we can get the information from the feedvar.prototxt instead of assume.
size_t feedvar_batch_size(int feedvar_index) {
if (!check_feedvar_valid(feedvar_index)) { if (!check_feedvar_valid(feedvar_index)) {
return 0; return 0;
} }
// if lod, 'lod[0].size()-1' is batch.
// for PaddleTensor lod is vector<vector<size_t>>, so lod[0] is real lod.
// for example, lod = [0,3,4,6], shape = [6,340,340], batch is 3 actually.
// for lod, the batch < shape[0].
if ((*inVectorT_ptr)[feedvar_index].lod.size() > 0 &&
(*inVectorT_ptr)[feedvar_index].lod[0].size() > 0) {
return (*inVectorT_ptr)[feedvar_index].lod[0].size() - 1;
}
// if not lod, the first dimension of data `PaddleTensor.shape[0]` is batch.
return (*inVectorT_ptr)[feedvar_index].shape[0]; return (*inVectorT_ptr)[feedvar_index].shape[0];
} }
size_t feedvar_element_bytesize(int feedvar_index) { size_t feedvar_element_bytesize(size_t feedvar_index) {
if (!check_feedvar_valid(feedvar_index)) { if (!check_feedvar_valid(feedvar_index)) {
return 0; return 0;
} }
...@@ -126,7 +212,7 @@ struct Task { ...@@ -126,7 +212,7 @@ struct Task {
// Now, the implementation of this function is based on assumption // Now, the implementation of this function is based on assumption
// that shape [0] = batch_size. // that shape [0] = batch_size.
size_t feedvar_element_num(int feedvar_index) { size_t feedvar_element_num(size_t feedvar_index) {
if (!check_feedvar_valid(feedvar_index)) { if (!check_feedvar_valid(feedvar_index)) {
return 0; return 0;
} }
...@@ -138,18 +224,18 @@ struct Task { ...@@ -138,18 +224,18 @@ struct Task {
return 1; return 1;
} }
// start from shape[1], cause shape[0] = batch_size. // start from shape[1], cause shape[0] = batch_size.
for (int i = 1; i < (*inVectorT_ptr)[feedvar_index].shape.size(); ++i) { for (size_t i = 1; i < (*inVectorT_ptr)[feedvar_index].shape.size(); ++i) {
element_num *= (*inVectorT_ptr)[feedvar_index].shape[i]; element_num *= (*inVectorT_ptr)[feedvar_index].shape[i];
} }
return element_num; return element_num;
} }
size_t feedvar_bytesize(int feedvar_index) { size_t feedvar_bytesize(size_t feedvar_index) {
return feedvar_element_num(feedvar_index) * return feedvar_element_num(feedvar_index) *
feedvar_element_bytesize(feedvar_index); feedvar_element_bytesize(feedvar_index);
} }
ShapeVector feedvar_shape_nobatch(int feedvar_index) { ShapeVector feedvar_shape_nobatch(size_t feedvar_index) {
if (!check_feedvar_valid(feedvar_index)) { if (!check_feedvar_valid(feedvar_index)) {
return ShapeVector(); return ShapeVector();
} }
...@@ -158,40 +244,167 @@ struct Task { ...@@ -158,40 +244,167 @@ struct Task {
} }
VectorOfShapeVector feedvar_shape_nobatch() { VectorOfShapeVector feedvar_shape_nobatch() {
VectorOfShapeVector vector_of_feedvar_shape_nobatch(inVectorT_ptr->size()); VectorOfShapeVector vector_of_feedvar_shape_nobatch;
for (int index = 0; index < inVectorT_ptr->size(); ++index) { for (size_t feedvar_index = 0; feedvar_index < inVectorT_ptr->size();
vector_of_feedvar_shape_nobatch.push_back(feedvar_shape_nobatch(index)); ++feedvar_index) {
vector_of_feedvar_shape_nobatch.push_back(
feedvar_shape_nobatch(feedvar_index));
} }
return vector_of_feedvar_shape_nobatch; return vector_of_feedvar_shape_nobatch;
} }
// At present, it is considered that the batch of all feedvar is consistent. // For each feedvar, batch should be 1 or batch_size.
// so for each feedvar, PaddleTensor.shape[0] should be the same. // if feedvar-1: batch_size = 1 (always not batch).
bool check_batch_align() { // feedvar-2: batch_size = n, batch = n.
int batch_size_align = feedvar_batch_size(0); // this function is not thread safe. only called when task is creating.
for (int feedvar_index = 0; feedvar_index < inVectorT_ptr->size(); bool task_init() {
total_feed_batch = feedvar_batch_size(0);
// which means error.
if (total_feed_batch <= 0) return false;
for (size_t feedvar_index = 0; feedvar_index < inVectorT_ptr->size();
++feedvar_index) { ++feedvar_index) {
if (feedvar_batch_size(feedvar_index) != batch_size_align) { // TODO(HexToString): Distinguish between nobatch and batch =
return 0; // 1(By:HexToString)
// 当数据中feedvar-1: 带batch,且batch =1,shape[0] = 1
// feedvar-2:不带batch,由于不带batch导致shape[0] =1
// 此时,无法分辨是否是天然nobatch,此时set_feed_nobatch_index会漏掉
// 后续希望在其他地方能够区分两者。
if (feedvar_batch_size(feedvar_index) != total_feed_batch) {
// which means error.
if (feedvar_batch_size(feedvar_index) != 1 && total_feed_batch != 1) {
return false;
} else {
// which means feedvar shape[0] = 1.
// shape[0] does not change with batch
set_feed_nobatch_index.insert(feedvar_index);
total_feed_batch =
std::max(feedvar_batch_size(feedvar_index), total_feed_batch);
}
}
// 将lod feedvar index加入到vector中。
if ((*inVectorT_ptr)[feedvar_index].lod.size() > 0 &&
(*inVectorT_ptr)[feedvar_index].lod[0].size() > 0) {
set_feed_lod_index.insert(feedvar_index);
} }
} }
/* return true;
for(int fetchvar_index = 0; fetchvar_index < outVectorT_ptr->size(); }
++fetchvar_index) {
if(fetchvar_batch_size(fetchvar_index) != batch_size_align) { size_t batch_size() { return total_feed_batch; }
return 0;
// start_batch range is 0~batch_size, end_batch range is 1~batch_size
// start_batch should not be included, end_batch > start_batch
// return is (start_batch, end_batch] = [start_batch+1,end_batch]
// for not lod, shape0_index = [(start_batch+1)-1,end_batch-1] =
// [start_batch,end_batch-1] = [start_batch,end_batch)
// for lod, shape0_index = [lod[start_batch],lod[end_batch]-1] =
// [lod[start_batch],lod[end_batch])
// for nobatch, shape0_index = [0,1)
// 对于调用者,拿到shape0_index后,for(size_t myindex =shape0_index[0];
// myindex <shape0_index[1];myindex++)即可.
// 原始lod= [0,3,4,6] 取的batch为(start_batch = 1,end_batch =
// 3],即取batch=2,3.
// 此时lod=[3,4,6],处理后得到[1,3]
// 这样处理后,合并lod比较方便,直接加上上一个lod的结尾的值即可。
std::vector<std::vector<size_t>> get_feature_by_batch(size_t feedvar_index,
size_t start_batch,
size_t end_batch) {
std::vector<std::vector<size_t>> feature_vector;
// feature_vector是双层vector,这么设计是由于一个遍历即可处理所有的特征。
// feature_vector[0]是由shape0_index的范围值组成的vector,包含两个元素最小和最大值。
// feature_vector[1]是由lod组成的vector,包含指定batch的lod信息.
// feature_vector[2]是由单个元素的组成的vector,元素值为1表示是nobatch的feedvar。
// if 为 nobatch feedvar情况。
// else if 为带lod的feedvar情况。
// else为不带lod 普通feedvar情况。
if (set_feed_nobatch_index.size() > 0 &&
set_feed_nobatch_index.find(feedvar_index) !=
set_feed_nobatch_index.end()) {
feature_vector = {{0, 1}, {}, {1}};
} else if (set_feed_lod_index.size() > 0 &&
set_feed_lod_index.find(feedvar_index) !=
set_feed_lod_index.end()) {
std::vector<size_t> feed_lod_vector(end_batch - start_batch);
for (size_t lod_index = start_batch + 1, vector_index = 0;
lod_index < end_batch + 1;
++lod_index, ++vector_index) {
feed_lod_vector[vector_index] =
(*inVectorT_ptr)[feedvar_index].lod[0][lod_index] -
(*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
} }
size_t shape0_start = (*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
size_t shape0_end = (*inVectorT_ptr)[feedvar_index].lod[0][end_batch];
feature_vector = {{shape0_start, shape0_end}, feed_lod_vector};
// feature_vector.push_back(feed_lod_vector);
} else {
feature_vector = {{start_batch, end_batch}};
} }
*/ return feature_vector;
return 1;
} }
size_t batch_size() { bool combine_taskmeta() {
if (check_batch_align()) { // 只有含有lod类型的fetch输出,且task被拆分为多个taskmeta的情况
return feedvar_batch_size(0); // 才需要将数据从outLodTensorVector搬运到outVectorT_ptr
if (vector_fetch_lod_index.size() > 0 && taskmeta_num > 1) {
for (size_t index = 0; index < vector_fetch_lod_index.size(); ++index) {
size_t data_length = 0;
size_t lod_length = 0;
size_t total_shape0 = 0;
size_t feedvar_index = vector_fetch_lod_index[index];
// 由于PaddleTensor的resize实现,是每次都会清空,所以必须先统计总长度。
for (size_t taskmeta_index = 0; taskmeta_index < taskmeta_num;
++taskmeta_index) {
data_length +=
outLodTensorVector[taskmeta_index][index].data.length();
lod_length += outLodTensorVector[taskmeta_index][index].lod[0].size();
total_shape0 += outLodTensorVector[taskmeta_index][index].shape[0];
}
// 一次性扩容PaddleTensor中的data和lod
paddle::PaddleTensor& fetchVarTensor = (*outVectorT_ptr)[feedvar_index];
fetchVarTensor.data.Resize(data_length);
// task中的lod补0
if (fetchVarTensor.lod.size() <= 0) {
fetchVarTensor.lod.push_back({0});
} else if (fetchVarTensor.lod[0].size() <= 0) {
fetchVarTensor.lod[0].push_back(0);
}
fetchVarTensor.lod[0].resize(lod_length + 1, 0);
//
size_t data_length_offset = 0;
size_t lod_length_offset = 0;
size_t once_data_length = 0;
size_t once_lod_length = 0;
size_t last_lod_value = fetchVarTensor.lod[0][lod_length_offset];
for (size_t taskmeta_index = 0; taskmeta_index < taskmeta_num;
++taskmeta_index) {
void* dst_ptr = fetchVarTensor.data.data() + data_length_offset;
void* source_ptr =
outLodTensorVector[taskmeta_index][index].data.data();
once_data_length =
outLodTensorVector[taskmeta_index][index].data.length();
memcpy(dst_ptr, source_ptr, once_data_length);
once_lod_length =
outLodTensorVector[taskmeta_index][index].lod[0].size();
for (size_t once_index = 0; once_index < once_lod_length;
++once_index) {
fetchVarTensor.lod[0][lod_length_offset + 1] =
last_lod_value +
outLodTensorVector[taskmeta_index][index].lod[0][once_index];
}
data_length_offset += once_data_length;
lod_length_offset += once_lod_length;
}
}
} }
return 0; return true;
} }
bool task_fetch_init(BatchTasks<TaskT>& batchTask);
bool task_fetch_create(BatchTasks<TaskT>& batchTask);
}; };
// `Several Task` or `part of batch in Task` can be a TaskMeta. // `Several Task` or `part of batch in Task` can be a TaskMeta.
...@@ -206,61 +419,164 @@ struct Task { ...@@ -206,61 +419,164 @@ struct Task {
// TaskMeta is necessary. // TaskMeta is necessary.
// cause we need know the the corresponding relationship between // cause we need know the the corresponding relationship between
// `batch_out`(which is in BatchTasks) and `outVectorT_ptr`(which is in Task). // `_batch_out`(which is in BatchTasks) and `outVectorT_ptr`(which is in Task).
// especially when 1 Task be divided into several TaskMeta and be put into // especially when 1 Task be divided into several TaskMeta and be put into
// several different BatchTasks. // several different BatchTasks.
// begin、add、end means batch, not shape[0].
// if not lod, batch == shape[0]. if lod, batch != shape[0]
// for example, lod = [0,3,4,6], shape = [6,340,340]
// there is 3 batch actually, add = 3, but shape[0] = 6.
template <typename TaskT> template <typename TaskT>
struct TaskMeta { struct TaskMeta {
TaskMeta(TaskT* ptr, size_t start, size_t add) TaskMeta(TaskT* ptr, size_t start, size_t add, size_t taskmeta_index)
: task(ptr), begin(start), end(start + add) {} : task(ptr),
begin(start),
end(start + add),
taskmeta_index(taskmeta_index) {
feedvar_num = ptr->inVectorT_ptr->size();
for (size_t feedvar_index = 0; feedvar_index < feedvar_num;
++feedvar_index) {
std::vector<std::vector<size_t>> feature =
ptr->get_feature_by_batch(feedvar_index, start, start + add);
feed_shape0_range.push_back(feature[0]);
feedvar_type.push_back(feature.size());
if (feature.size() == 1) {
feed_lod_vector.push_back({});
} else if (feature.size() == 2) {
feed_lod_vector.push_back(feature[1]);
} else {
feed_lod_vector.push_back({});
}
}
}
TaskT* task; TaskT* task;
size_t begin; size_t begin;
size_t end; size_t end;
size_t feedvar_num;
size_t taskmeta_index;
std::vector<std::vector<size_t>> feed_shape0_range;
std::vector<std::vector<size_t>> feed_lod_vector;
std::vector<size_t> feedvar_type;
}; };
// each TaskT is already include batch in itself // each TaskT is already include batch in itself
// BatchTasks need to combine several `small TaskMeta` into a new `big TaskT`. // BatchTasks need to combine several `small TaskMeta` into a new `big TaskT`.
// The only difference between the `big TaskT` and `small TaskT` is that // The only difference between the `big TaskT` and `small TaskT` is that
// the TaskT.inVectorT_ptr->[feedvar_index].shape[0] // the TaskT.inVectorT_ptr->[feedvar_index].shape[0] is different
// which is actually batch_size is different. // `big TaskT`.inVectorT_ptr->[feedvar_index].shape[0] is actually batch_size .
template <typename TaskT> template <typename TaskT>
class BatchTasks { class BatchTasks {
public: public:
typedef typename TaskT::InType InType; typedef typename TaskT::InType InType;
typedef typename TaskT::OutType OutType; typedef typename TaskT::OutType OutType;
typedef TaskMeta<TaskT> TaskMetaT; typedef TaskMeta<TaskT> TaskMetaT;
typedef std::vector<size_t> ShapeVector;
typedef std::vector<ShapeVector> VectorOfShapeVector;
typedef std::vector<size_t> LodVector;
typedef std::vector<LodVector> PaddleTensorLod;
friend TaskT;
explicit BatchTasks(size_t batch_size, bool batch_align = true) explicit BatchTasks(size_t batch_size,
bool overrun = false,
bool allow_split_request = true)
: _batch_size(batch_size), : _batch_size(batch_size),
_rem_size(batch_size), _rem_size(batch_size),
_batch_align(batch_align) { _overrun(overrun),
_allow_split_request(allow_split_request) {
_batch_in.clear(); _batch_in.clear();
_batch_in_offset.clear(); _batch_in_offset.clear();
_total_shape0_batch_in.clear();
_total_feed_batch = 0;
_batch_in_lod.clear();
_batch_out.clear(); _batch_out.clear();
_batch_out_offset.clear(); _batch_out_offset.clear();
_total_fetch_batch = 0;
_taskmeta_vector.clear(); _taskmeta_vector.clear();
set_fetch_nobatch_index.clear();
vector_fetch_lod_index.clear();
} }
~BatchTasks() { ~BatchTasks() {
_batch_in.clear(); _batch_in.clear();
_batch_in_offset.clear(); _batch_in_offset.clear();
_total_shape0_batch_in.clear();
_total_feed_batch = 0;
_batch_in_lod.clear();
_batch_out.clear(); _batch_out.clear();
_batch_out_offset.clear(); _batch_out_offset.clear();
_total_fetch_batch = 0;
_taskmeta_vector.clear(); _taskmeta_vector.clear();
set_fetch_nobatch_index.clear();
vector_fetch_lod_index.clear();
} }
// synchronized operation // synchronized operation
// because Upper level callers of this function have already locked. // because Upper level callers of this function have already locked.
// 能进到此函数的task都是同类task,在该函数之前已保证了这点。
size_t append_task(TaskT* task) { size_t append_task(TaskT* task) {
size_t add = std::min(task->rem, _rem_size); size_t add = std::min(task->rem, _rem_size);
if (!_batch_align) { // when _overrun == true, it means always take a whole task as TaskMeta
// we can temporary breakthrough the limit of BatchTask`s capacity
// BatchTask`s capacity is _batch_size or _rem_size
if (_overrun) {
add = task->rem; add = task->rem;
} }
int start_index = task->batch_size() - task->rem; int start_index = task->batch_size() - task->rem;
TaskMetaT tm(task, start_index, add); TaskMetaT tm(task, start_index, add, task->taskmeta_num);
task->taskmeta_num += 1;
_taskmeta_vector.push_back(tm); _taskmeta_vector.push_back(tm);
if (_batch_in_offset.size() == 0) {
_batch_in_offset.resize(tm.feedvar_num, 0);
}
if (_total_shape0_batch_in.size() == 0) {
_total_shape0_batch_in.resize(tm.feedvar_num, 0);
}
if (_batch_in_lod.size() == 0) {
PaddleTensorLod null_lod;
_batch_in_lod.resize(tm.feedvar_num, null_lod);
}
_total_feed_batch += add;
for (size_t feedvar_index = 0; feedvar_index < tm.feedvar_num;
++feedvar_index) {
if (tm.feedvar_type[feedvar_index] == 1) {
// 普通的非lod feedvar
// 累计计算shape0的累加值,为后面初始化PaddleTensor做准备。
_total_shape0_batch_in[feedvar_index] +=
tm.feed_shape0_range[feedvar_index][1] -
tm.feed_shape0_range[feedvar_index][0];
} else if (tm.feedvar_type[feedvar_index] == 2) {
// lod类型的feedvar
// 累计计算shape0的累加值,为后面初始化PaddleTensor做准备。
_total_shape0_batch_in[feedvar_index] +=
tm.feed_shape0_range[feedvar_index][1] -
tm.feed_shape0_range[feedvar_index][0];
// 在Lod最前面加0
if (_batch_in_lod[feedvar_index].size() <= 0) {
_batch_in_lod[feedvar_index].push_back({0});
} else if (_batch_in_lod[feedvar_index][0].size() <= 0) {
_batch_in_lod[feedvar_index][0].push_back(0);
}
// 将lod加上前一组lod的结尾最大值,组合Lod
size_t last_lod_value = _batch_in_lod[feedvar_index][0].back();
for (size_t lod_index = 0;
lod_index < tm.feed_lod_vector[feedvar_index].size();
++lod_index) {
_batch_in_lod[feedvar_index][0].push_back(
last_lod_value + tm.feed_lod_vector[feedvar_index][lod_index]);
}
} else {
// tm.feedvar_type[feedvar_index] == 3
// nobatch类型的feedvar.
// 此时不累加,且值应为1
_total_shape0_batch_in[feedvar_index] =
tm.feed_shape0_range[feedvar_index][1] -
tm.feed_shape0_range[feedvar_index][0];
}
}
task->rem -= add; task->rem -= add;
_rem_size -= add; _rem_size -= add;
return _rem_size; return _rem_size;
...@@ -281,72 +597,56 @@ class BatchTasks { ...@@ -281,72 +597,56 @@ class BatchTasks {
// cause maybe next time we don`t need to do the extra copy. // cause maybe next time we don`t need to do the extra copy.
// directly copy the every Task into the Predictor. // directly copy the every Task into the Predictor.
// lod is not supported.
// if lod is set, we should not allow to use the bsf task.
// batch.merge_tasks() is thread-safe function // batch.merge_tasks() is thread-safe function
// cause batch is a local variable and Task is just read, not written. // cause batch is a local variable and Task is just read, not written.
void merge_tasks() { void merge_tasks() {
if (_taskmeta_vector.size() <= 0) { if (_taskmeta_vector.size() <= 0) {
return; return;
} }
// Temporarily, the batch of each feedvar is consistent
// If not consistent, use feedvar_batch_size instead of task->batch_size().
int temp_batch = 0;
for (size_t ti = 0; ti < _taskmeta_vector.size(); ++ti) {
TaskMetaT& tm = _taskmeta_vector[ti];
temp_batch += tm.task->batch_size();
}
if (temp_batch > _batch_size) {
LOG(ERROR) << "_realNumber_batch_in >_batch_size, error.";
return;
}
int feedvar_num = _taskmeta_vector[0].task->inVectorT_ptr->size();
if (_batch_in_offset.size() == 0) {
_batch_in_offset.resize(feedvar_num, 0);
_realNumber_batch_in.resize(feedvar_num, temp_batch);
}
for (size_t ti = 0; ti < _taskmeta_vector.size(); ++ti) { for (size_t ti = 0; ti < _taskmeta_vector.size(); ++ti) {
TaskMetaT& tm = _taskmeta_vector[ti]; TaskMetaT& tm = _taskmeta_vector[ti];
for (int index = 0; index < feedvar_num; ++index) { for (size_t feedvar_index = 0; feedvar_index < tm.feedvar_num;
++feedvar_index) {
const paddle::PaddleTensor& feedVarTensor = const paddle::PaddleTensor& feedVarTensor =
(*tm.task->inVectorT_ptr)[index]; (*tm.task->inVectorT_ptr)[feedvar_index];
size_t feedvar_bytesize = tm.task->feedvar_bytesize(index); size_t feedvar_bytesize = tm.task->feedvar_bytesize(feedvar_index);
if (ti == 0) { if (ti == 0) {
if (feedVarTensor.lod.size() > 0 && feedVarTensor.lod[0].size() > 0) { // Create the entire tensor at once
LOG(ERROR) << "lod Tensor is not supported now.";
return;
}
// for now, we assume that every task feedvar_bytesize is the same. // for now, we assume that every task feedvar_bytesize is the same.
// which means we dont support auto embedding. // which means we dont support auto embedding.
// but for different feedvar, it is different. // but for different feedvar, it is different.
paddle::PaddleTensor paddleTensor; paddle::PaddleTensor paddleTensor;
paddleTensor.dtype = feedVarTensor.dtype; paddleTensor.dtype = feedVarTensor.dtype;
paddleTensor.name = feedVarTensor.name; paddleTensor.name = feedVarTensor.name;
paddleTensor.lod = feedVarTensor.lod; paddleTensor.lod = _batch_in_lod[feedvar_index];
paddleTensor.shape = feedVarTensor.shape; paddleTensor.shape = feedVarTensor.shape;
paddleTensor.shape[0] = _realNumber_batch_in[index]; paddleTensor.shape[0] = _total_shape0_batch_in[feedvar_index];
paddleTensor.data.Resize(feedvar_bytesize * paddleTensor.data.Resize(feedvar_bytesize *
_realNumber_batch_in[index]); _total_shape0_batch_in[feedvar_index]);
_batch_in.push_back(paddleTensor); _batch_in.push_back(paddleTensor);
} }
void* dst_ptr = _batch_in[index].data.data() + _batch_in_offset[index]; void* dst_ptr = _batch_in[feedvar_index].data.data() +
_batch_in_offset[feedvar_index];
void* source_ptr = void* source_ptr =
feedVarTensor.data.data() + feedvar_bytesize * tm.begin; feedVarTensor.data.data() +
size_t length = feedvar_bytesize * (tm.end - tm.begin); feedvar_bytesize * tm.feed_shape0_range[feedvar_index][0];
size_t length =
feedvar_bytesize * (tm.feed_shape0_range[feedvar_index][1] -
tm.feed_shape0_range[feedvar_index][0]);
memcpy(dst_ptr, source_ptr, length); memcpy(dst_ptr, source_ptr, length);
_batch_in_offset[index] += length; // nobatch类型的feedvar,不叠加.
if (tm.feedvar_type[feedvar_index] != 3)
_batch_in_offset[feedvar_index] += length;
} }
} }
} }
bool check_fetchvar_valid(int fetchvar_index) { bool check_fetchvar_valid(size_t fetchvar_index) {
if (fetchvar_index < 0 || _batch_out.size() <= fetchvar_index) { if (fetchvar_index < 0 || _batch_out.size() <= fetchvar_index) {
LOG(ERROR) << "fetchvar doesnt exsit or fetchvar_index error"; LOG(ERROR) << "fetchvar doesnt exsit or fetchvar_index error";
return 0; return 0;
...@@ -360,19 +660,11 @@ class BatchTasks { ...@@ -360,19 +660,11 @@ class BatchTasks {
return 1; return 1;
} }
size_t fetchvar_batch_size(int fetchvar_index) { size_t fetchvar_element_bytesize(size_t fetchvar_index) {
if (!check_fetchvar_valid(fetchvar_index)) {
return 0;
}
return _batch_out[fetchvar_index].shape[0];
}
size_t fetchvar_element_bytesize(int fetchvar_index) {
if (!check_fetchvar_valid(fetchvar_index)) { if (!check_fetchvar_valid(fetchvar_index)) {
return 0; return 0;
} }
int dtype = _batch_out[fetchvar_index].dtype; size_t dtype = _batch_out[fetchvar_index].dtype;
if (dtype == paddle::PaddleDType::INT64) { if (dtype == paddle::PaddleDType::INT64) {
return sizeof(int64_t); return sizeof(int64_t);
} }
...@@ -390,7 +682,7 @@ class BatchTasks { ...@@ -390,7 +682,7 @@ class BatchTasks {
// Now, the implementation of this function is based on assumption // Now, the implementation of this function is based on assumption
// that shape [0] = batch_size. // that shape [0] = batch_size.
size_t fetchvar_element_num(int fetchvar_index) { size_t fetchvar_element_num(size_t fetchvar_index) {
if (!check_fetchvar_valid(fetchvar_index)) { if (!check_fetchvar_valid(fetchvar_index)) {
return 0; return 0;
} }
...@@ -400,35 +692,66 @@ class BatchTasks { ...@@ -400,35 +692,66 @@ class BatchTasks {
return 1; return 1;
} }
// start from shape[1], cause shape[0] = batch_size. // start from shape[1], cause shape[0] = batch_size.
for (int i = 1; i < _batch_out[fetchvar_index].shape.size(); ++i) { for (size_t i = 1; i < _batch_out[fetchvar_index].shape.size(); ++i) {
element_num *= _batch_out[fetchvar_index].shape[i]; element_num *= _batch_out[fetchvar_index].shape[i];
} }
return element_num; return element_num;
} }
size_t fetchvar_bytesize(int fetchvar_index) { size_t fetchvar_bytesize(size_t fetchvar_index) {
return fetchvar_element_num(fetchvar_index) * return fetchvar_element_num(fetchvar_index) *
fetchvar_element_bytesize(fetchvar_index); fetchvar_element_bytesize(fetchvar_index);
} }
bool check_fetchvar_batch_align() { size_t fetchvar_batch_size(size_t fetchvar_index) {
int batch_size_align = fetchvar_batch_size(0); if (!check_fetchvar_valid(fetchvar_index)) {
return 0;
for (int fetchvar_index = 0; fetchvar_index < _batch_out.size();
++fetchvar_index) {
if (fetchvar_batch_size(fetchvar_index) != batch_size_align) {
return 0;
}
} }
// if lod, 'lod[0].size()-1' is batch.
return 1; // for PaddleTensor lod is vector<vector<size_t>>, so lod[0] is real lod.
// for example, lod = [0,3,4,6], shape = [6,340,340], batch is 3 actually.
// for lod, the batch < shape[0].
if (_batch_out[fetchvar_index].lod.size() > 0 &&
_batch_out[fetchvar_index].lod[0].size() > 0) {
return _batch_out[fetchvar_index].lod[0].size() - 1;
}
// if not lod, the first dimension of data `PaddleTensor.shape[0]` is batch.
return _batch_out[fetchvar_index].shape[0];
} }
size_t fetchvar_batch_size() { size_t fetchvar_batch_size() { return _total_fetch_batch; }
if (check_fetchvar_batch_align()) {
return fetchvar_batch_size(0); bool deal_batch_out() {
_total_fetch_batch = fetchvar_batch_size(0);
if (_total_fetch_batch <= 0) return false;
for (size_t fetchvar_index = 0; fetchvar_index < _batch_out.size();
++fetchvar_index) {
// TODO(HexToString): Distinguish between nobatch and batch =
// 1(By:HexToString)
// 当数据中fetchvar-1: 带batch,且batch =1,shape[0] = 1
// fetchvar-2:不带batch,由于不带batch导致shape[0] =1
// 此时,无法分辨是否是天然nobatch,此时set_fetch_nobatch_index会漏掉
// 后续希望在其他地方能够区分两者。
if (fetchvar_batch_size(fetchvar_index) != _total_fetch_batch) {
// which means error.
if (fetchvar_batch_size(fetchvar_index) != 1 &&
_total_fetch_batch != 1) {
return false;
} else {
// which means fetchvar shape[0] = 1.
// shape[0] does not change with batch
set_fetch_nobatch_index.insert(fetchvar_index);
_total_fetch_batch =
std::max(fetchvar_batch_size(fetchvar_index), _total_fetch_batch);
}
}
// 将lod fetchvar index加入到vector中。
if (_batch_out[fetchvar_index].lod.size() > 0 &&
_batch_out[fetchvar_index].lod[0].size() > 0) {
vector_fetch_lod_index.push_back(fetchvar_index);
}
} }
return 0; return true;
} }
void notify_tasks() { void notify_tasks() {
...@@ -436,12 +759,16 @@ class BatchTasks { ...@@ -436,12 +759,16 @@ class BatchTasks {
LOG(ERROR) << "_taskmeta_vector.size() <=0, error."; LOG(ERROR) << "_taskmeta_vector.size() <=0, error.";
return; return;
} }
if (_realNumber_batch_in[0] != fetchvar_batch_size()) { // 根据_batch_out,求出输出的整体batch
// 并将lod类型和nobatch类型的fetchvar的index记录到set中,方便后续查看。
deal_batch_out();
// 若输出的batch不是1,且不与输入batch对应,则错误
if (_total_feed_batch != _total_fetch_batch && _total_fetch_batch != 1) {
LOG(ERROR) << "_batch_out`s batch != _batch_in`s batch, error."; LOG(ERROR) << "_batch_out`s batch != _batch_in`s batch, error.";
return; return;
} }
int fetchvar_num = _batch_out.size(); size_t fetchvar_num = _batch_out.size();
if (_batch_out_offset.size() == 0) { if (_batch_out_offset.size() == 0) {
_batch_out_offset.resize(fetchvar_num, 0); _batch_out_offset.resize(fetchvar_num, 0);
} }
...@@ -451,44 +778,132 @@ class BatchTasks { ...@@ -451,44 +778,132 @@ class BatchTasks {
size_t begin = _taskmeta_vector[ti].begin; size_t begin = _taskmeta_vector[ti].begin;
size_t end = _taskmeta_vector[ti].end; size_t end = _taskmeta_vector[ti].end;
size_t add = end - begin; size_t add = end - begin;
size_t taskmeta_index = _taskmeta_vector[ti].taskmeta_index;
for (int index = 0; index < fetchvar_num; ++index) { // 对task中的outVectorT_ptr进行初始化
// the task->outVectorT_ptr is null before core->run(). // 如果是lod输出+多个taskmeta,此时对outLodTensorVector也需要初始化
// first time we should copy from _batch_out if (!task->task_fetch_init(*this)) {
// so we need init. LOG(ERROR) << " task_fetch_init error.";
size_t fetchvar_bytesize_index = fetchvar_bytesize(index); return;
if (task->outVectorT_ptr->size() <= index) { }
paddle::PaddleTensor tensor_out; size_t fetch_lod_index = 0;
tensor_out.name = _batch_out[index].name;
tensor_out.dtype = paddle::PaddleDType(_batch_out[index].dtype); for (size_t fetchvar_index = 0; fetchvar_index < fetchvar_num;
tensor_out.shape = _batch_out[index].shape; ++fetchvar_index) {
tensor_out.shape[0] = task->batch_size(); size_t fetchvar_bytesize_index = fetchvar_bytesize(fetchvar_index);
tensor_out.lod = _batch_out[index].lod;
// resize all batch memory at one time if (set_fetch_nobatch_index.size() > 0 &&
size_t databuf_size = task->batch_size() * fetchvar_bytesize_index; set_fetch_nobatch_index.find(fetchvar_index) !=
tensor_out.data.Resize(databuf_size); set_fetch_nobatch_index.end()) {
task->outVectorT_ptr->push_back(tensor_out); // nobatch fetchvar情况
} // 无论输入是多少batch,该index的fetchvar始终就shape[0] = 1
paddle::PaddleTensor& fetchVarTensor =
paddle::PaddleTensor& fetchVarTensor = (*task->outVectorT_ptr)[index]; (*task->outVectorT_ptr)[fetchvar_index];
void* dst_ptr = fetchVarTensor.data.data();
void* dst_ptr = size_t length = fetchvar_bytesize_index * 1;
fetchVarTensor.data.data() + fetchvar_bytesize_index * begin; void* source_ptr = _batch_out[fetchvar_index].data.data();
size_t length = fetchvar_bytesize_index * add; memcpy(dst_ptr, source_ptr, length);
if (_batch_out_offset[index] + length > } else if (vector_fetch_lod_index.size() > 0 &&
fetchvar_batch_size() * fetchvar_bytesize(index)) { std::find(vector_fetch_lod_index.begin(),
LOG(ERROR) << "_batch_out is less than taskmeta, error."; vector_fetch_lod_index.end(),
return; fetchvar_index) != vector_fetch_lod_index.end()) {
// lod fetchvar情况,此时无法确定总的shape[0]
// 根据task中的task_num总数开辟task_num个临时空间
// 每个lod型的fetchvar拷贝到对应的临时空间中
// 最后再计算临时空间的总量,合并fetchvar和lod
size_t last_batch = _batch_out_offset[fetchvar_index];
size_t shape0_index_start =
_batch_out[fetchvar_index].lod[0][last_batch];
size_t shape0_index_end =
_batch_out[fetchvar_index].lod[0][last_batch + add];
size_t shape0_length = shape0_index_end - shape0_index_start;
// task被拆分为多个taskmeta时,不能直接拷入task->outVectorT_ptr
// 此时,先拷入task->outLodTensorVector[taskmeta_index]
// 当task所有的taskmeta都完成时,再按照顺序进行拷贝回task->outVectorT_ptr。
if (task->taskmeta_num > 1) {
paddle::PaddleTensor& fetchVarTensor =
task->outLodTensorVector[taskmeta_index][fetch_lod_index];
size_t length = fetchvar_bytesize_index * shape0_length;
fetchVarTensor.shape[0] = shape0_length;
fetchVarTensor.data.Resize(length);
void* dst_ptr = fetchVarTensor.data.data();
void* source_ptr = _batch_out[fetchvar_index].data.data() +
shape0_index_start * fetchvar_bytesize_index;
memcpy(dst_ptr, source_ptr, length);
// 由于是拆分的各个lod,不要补0,在最后合并给Task中的outVectorT_ptr时再补。
if (fetchVarTensor.lod.size() <= 0) {
fetchVarTensor.lod.push_back({});
}
fetchVarTensor.lod[0].resize(add, 0);
size_t last_lod_value =
_batch_out[fetchvar_index].lod[0][last_batch];
for (size_t lod_index = last_batch + 1, my_index = 0;
lod_index < last_batch + add + 1;
++lod_index, ++my_index) {
fetchVarTensor.lod[0][my_index] =
(_batch_out[fetchvar_index].lod[0][lod_index] -
last_lod_value);
}
} else {
// task未被拆分为多个taskmeta,故只有某个线程中的taskmeta会操作task不存在多线程竞争
// 此时resize后,直接写入task->outVectorT_ptr中即可。
paddle::PaddleTensor& fetchVarTensor =
(*task->outVectorT_ptr)[fetchvar_index];
size_t length = fetchvar_bytesize_index * shape0_length;
fetchVarTensor.shape[0] = shape0_length;
fetchVarTensor.data.Resize(length);
void* dst_ptr = fetchVarTensor.data.data();
void* source_ptr = _batch_out[fetchvar_index].data.data() +
shape0_index_start * fetchvar_bytesize_index;
memcpy(dst_ptr, source_ptr, length);
// task中的lod补0
if (fetchVarTensor.lod.size() <= 0) {
fetchVarTensor.lod.push_back({0});
} else if (fetchVarTensor.lod[0].size() <= 0) {
fetchVarTensor.lod[0].push_back(0);
}
// 将合并的lod信息对应的batch,拆分到task中。
// 注意,此时需要去掉前面lod导致的前置积累。
// 例如: 合lod = [0,2,5;7,10],是由两组batch=2的task合并后预测的。
// 此时拆分,第一组时,都减去0,得到[2,5]+(由于前面已经补了0了) =
// [0,2,5]
// 第二组,都需要减5,得到[2,5],这样处理才对。
fetchVarTensor.lod[0].resize(add + 1, 0);
size_t last_lod_value =
_batch_out[fetchvar_index].lod[0][last_batch];
for (size_t lod_index = last_batch + 1, my_index = 1;
lod_index < last_batch + add + 1;
++lod_index, ++my_index) {
fetchVarTensor.lod[0][my_index] =
(_batch_out[fetchvar_index].lod[0][lod_index] -
last_lod_value);
}
}
fetch_lod_index++;
} else {
// 普通fetchvar情况,此时该Task总的fetchvar_batch =
// 输入的总的batch_size()
// 输出的batch应与输入的batch对应相等。
paddle::PaddleTensor& fetchVarTensor =
(*task->outVectorT_ptr)[fetchvar_index];
void* dst_ptr =
fetchVarTensor.data.data() + fetchvar_bytesize_index * begin;
size_t length = fetchvar_bytesize_index * add;
void* source_ptr =
_batch_out[fetchvar_index].data.data() +
_batch_out_offset[fetchvar_index] * fetchvar_bytesize_index;
memcpy(dst_ptr, source_ptr, length);
} }
void* source_ptr = _batch_out_offset[fetchvar_index] += add;
_batch_out[index].data.data() + _batch_out_offset[index];
memcpy(dst_ptr, source_ptr, length);
_batch_out_offset[index] += length;
} }
// index是局部变量,fetch_add是原子操作,成功则返回原值。
// 只有最后一个taskmeta都完成后,该线程的index+add才能>task->batch_size()
// 故只有一个线程能进入if{}内.不会造成多线程竞争的问题。
size_t index = task->index.fetch_add(add); size_t index = task->index.fetch_add(add);
if ((index + add) >= task->batch_size()) { if ((index + add) >= task->batch_size()) {
task->combine_taskmeta();
char c = 0; char c = 0;
while (write(task->write_fd, &c, 1) != 1 && errno == EINTR) { while (write(task->write_fd, &c, 1) != 1 && errno == EINTR) {
} }
...@@ -503,17 +918,32 @@ class BatchTasks { ...@@ -503,17 +918,32 @@ class BatchTasks {
size_t task_size() { return _taskmeta_vector.size(); } size_t task_size() { return _taskmeta_vector.size(); }
const size_t get_rem_size() { return _rem_size; }
bool get_overrun() { return _overrun; }
bool get_allow_split_request() { return _allow_split_request; }
private: private:
std::vector<TaskMetaT> _taskmeta_vector; std::vector<TaskMetaT> _taskmeta_vector;
typename TaskT::InVectorT _batch_in; typename TaskT::InVectorT _batch_in;
std::vector<size_t> _batch_in_offset; std::vector<size_t> _batch_in_offset;
std::vector<size_t> _realNumber_batch_in; std::vector<size_t> _total_shape0_batch_in;
size_t _total_feed_batch;
std::vector<PaddleTensorLod> _batch_in_lod;
typename TaskT::OutVectorT _batch_out; typename TaskT::OutVectorT _batch_out;
std::vector<size_t> _batch_out_offset; std::vector<size_t> _batch_out_offset;
std::vector<size_t> _realNumber_batch_out; // std::vector<size_t> _total_shape0_batch_out;
size_t _total_fetch_batch;
// std::vector<PaddleTensorLod> _batch_out_lod;
std::set<size_t> set_fetch_nobatch_index;
std::vector<size_t> vector_fetch_lod_index;
size_t _rem_size; size_t _rem_size;
size_t _batch_size; size_t _batch_size;
bool _batch_align; bool _overrun;
bool _allow_split_request;
}; };
// BSF task handle // BSF task handle
...@@ -589,6 +1019,8 @@ class TaskExecutor { ...@@ -589,6 +1019,8 @@ class TaskExecutor {
typedef typename TaskT::OutVectorT OutVectorT; typedef typename TaskT::OutVectorT OutVectorT;
typedef std::vector<TaskT> TaskArrayT; typedef std::vector<TaskT> TaskArrayT;
typedef baidu::paddle_serving::predictor::MempoolWrapper MempoolWrapper; typedef baidu::paddle_serving::predictor::MempoolWrapper MempoolWrapper;
typedef std::vector<size_t> ShapeVector;
typedef std::vector<ShapeVector> VectorOfShapeVector;
TaskExecutor() TaskExecutor()
: _stop(false), : _stop(false),
...@@ -596,7 +1028,7 @@ class TaskExecutor { ...@@ -596,7 +1028,7 @@ class TaskExecutor {
_thread_reset_fn(NULL), _thread_reset_fn(NULL),
_user_thread_contexts(NULL), _user_thread_contexts(NULL),
_batch_size(DEFAULT_BATCH_SIZE), _batch_size(DEFAULT_BATCH_SIZE),
_batch_align(false), _overrun(false),
_fn(NULL) { _fn(NULL) {
THREAD_MUTEX_INIT(&_mut, NULL); THREAD_MUTEX_INIT(&_mut, NULL);
THREAD_COND_INIT(&_cond, NULL); THREAD_COND_INIT(&_cond, NULL);
...@@ -617,7 +1049,11 @@ class TaskExecutor { ...@@ -617,7 +1049,11 @@ class TaskExecutor {
void set_batch_size(size_t batch_size) { _batch_size = batch_size; } void set_batch_size(size_t batch_size) { _batch_size = batch_size; }
void set_batch_align(size_t batch_align) { _batch_align = batch_align; } void set_overrun(bool overrun) { _overrun = overrun; }
void set_allow_split_request(bool allow_split_request) {
_allow_split_request = allow_split_request;
}
void set_thread_init_fn(boost::function<int(void*)> init_fn, void set_thread_init_fn(boost::function<int(void*)> init_fn,
void** contexts = NULL) { void** contexts = NULL) {
...@@ -642,7 +1078,7 @@ class TaskExecutor { ...@@ -642,7 +1078,7 @@ class TaskExecutor {
TaskHandler<TaskT> schedule(const void*, void*); TaskHandler<TaskT> schedule(const void*, void*);
bool move_task_to_batch(BatchTasks<TaskT>& batch); // NOLINT bool move_task_to_batch(BatchTasks<TaskT>& batchTask); // NOLINT
private: private:
TaskExecutor(TaskExecutor<TaskT> const& other) = delete; TaskExecutor(TaskExecutor<TaskT> const& other) = delete;
...@@ -669,7 +1105,8 @@ class TaskExecutor { ...@@ -669,7 +1105,8 @@ class TaskExecutor {
std::vector<ThreadContext<TaskT>*> _thread_contexts; std::vector<ThreadContext<TaskT>*> _thread_contexts;
size_t _batch_size; size_t _batch_size;
bool _batch_align; bool _overrun;
bool _allow_split_request;
boost::function<void(const void*, void*)> _fn; boost::function<void(const void*, void*)> _fn;
}; };
...@@ -687,12 +1124,12 @@ class TaskExecutorVector { ...@@ -687,12 +1124,12 @@ class TaskExecutorVector {
void resize(int size) { _vector_executor.resize(size); } void resize(int size) { _vector_executor.resize(size); }
TaskExecutor<TaskT>& operator[](int index) { TaskExecutor<TaskT>& operator[](int task_index) {
if (_vector_executor.size() <= index || index <= -1) { if (_vector_executor.size() <= task_index || task_index <= -1) {
LOG(ERROR) << "_vector_executor.size() <= index or <= -1"; LOG(ERROR) << "_vector_executor.size() <= task_index or <= -1";
throw "_vector_executor.size() <= index or <= -1"; throw "_vector_executor.size() <= task_index or <= -1";
} }
return _vector_executor[index]; return _vector_executor[task_index];
} }
private: private:
...@@ -717,8 +1154,8 @@ class TaskManager { ...@@ -717,8 +1154,8 @@ class TaskManager {
typedef typename TaskT::InVectorT InVectorT; typedef typename TaskT::InVectorT InVectorT;
typedef typename TaskT::OutVectorT OutVectorT; typedef typename TaskT::OutVectorT OutVectorT;
explicit TaskManager(uint32_t index) // NOLINT explicit TaskManager(uint32_t model_index) // NOLINT
: _model_index(index) {} : _model_index(model_index) {}
~TaskManager() { wait(); } ~TaskManager() { wait(); }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#include "core/predictor/framework/cache.h"
#include <dirent.h>
#include <sys/stat.h>
#include <fstream>
#include <string>
#include <utility>
#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
int CubeCache::clear() {
for (auto it = _map_cache.begin(); it != _map_cache.end(); ++it) {
if (it->second) {
delete (it->second);
it->second = nullptr;
}
}
_map_cache.clear();
return 0;
}
rec::mcube::CubeValue* CubeCache::get_data(uint64_t key) {
auto it = _map_cache.find(key);
if (it != _map_cache.end()) {
return it->second;
}
return nullptr;
}
int CubeCache::reload_data(const std::string& cache_path) {
LOG(INFO) << "cube cache is loading data, path: " << cache_path;
DIR* dp = nullptr;
struct dirent* dirp = nullptr;
struct stat st;
// clear cache data
clear();
// loading data from cache files
if (stat(cache_path.c_str(), &st) < 0 || !S_ISDIR(st.st_mode)) {
LOG(ERROR) << "invalid cache path " << cache_path;
return -1;
}
if ((dp = opendir(cache_path.c_str())) == nullptr) {
LOG(ERROR) << "opendir " << cache_path << " fail.";
return -1;
}
while ((dirp = readdir(dp)) != nullptr) {
// filtering by file type.
if (dirp->d_type != DT_REG) {
continue;
}
// Filter upper-level directories and hidden files
if ((!strncmp(dirp->d_name, ".", 1)) || (!strncmp(dirp->d_name, "..", 2))) {
continue;
}
// Match the file whose name prefix is ​​'part-'
if (std::string(dirp->d_name).find("part-") != std::string::npos) {
SequenceFileRecordReader reader(cache_path + "/" + dirp->d_name);
if (reader.open() != 0) {
LOG(ERROR) << "open file failed! " << dirp->d_name;
continue;
}
if (reader.read_header() != 0) {
LOG(ERROR) << "read header error! " << dirp->d_name;
reader.close();
continue;
}
Record record(reader.get_header());
while (reader.next(&record) == 0) {
uint64_t key =
*reinterpret_cast<uint64_t*>(const_cast<char*>(record.key.data()));
auto it_find = _map_cache.find(key);
if (it_find != _map_cache.end()) {
// load dumplicate key
LOG(WARNING) << "Load dumplicate key:" << key
<< " from file:" << dirp->d_name;
continue;
}
rec::mcube::CubeValue* new_value = new rec::mcube::CubeValue();
new_value->error = 0;
new_value->buff.swap(record.value);
_map_cache.insert(std::make_pair(key, new_value));
}
LOG(WARNING) << "Load cube cache file " << dirp->d_name << " done.";
}
LOG(WARNING) << "Load all cube cache files done";
}
return 0;
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sys/types.h>
#include <numeric>
#include <string>
#include <unordered_map>
#include "core/cube/cube-api/include/cube_api.h"
namespace baidu {
namespace paddle_serving {
namespace predictor {
// Large models that use sparse parameters may use cube cache.
// When the cube cache exists, the model is required to be
// consistent with the version of the cube cache. Therefore,
// when the model is updated, the model and the cube cache are
// required to be reloaded at the same time.
// Load all cached data at once without updating, it's lock free
// switching two cube cache.
class CubeCache {
public:
CubeCache() {}
~CubeCache() { clear(); }
// clear cache data.
int clear();
// get cache data by key
rec::mcube::CubeValue* get_data(uint64_t key);
// reload all cache files from cache_path
int reload_data(const std::string& cache_path);
private:
// switching free lock, key type is uint64_t, value type is CubeValue*
std::unordered_map<uint64_t, rec::mcube::CubeValue*> _map_cache;
};
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
...@@ -21,6 +21,15 @@ ...@@ -21,6 +21,15 @@
#include <string> #include <string>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/op_repository.h" #include "core/predictor/framework/op_repository.h"
#ifdef BCLOUD
#include <base/atomicops.h>
#else
#include <butil/atomicops.h>
#endif
#include <errno.h>
#include "core/predictor/framework/resource.h"
using baidu::paddle_serving::predictor::Resource;
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
...@@ -238,6 +247,77 @@ const Channel* DagView::get_response_channel(const uint64_t log_id) const { ...@@ -238,6 +247,77 @@ const Channel* DagView::get_response_channel(const uint64_t log_id) const {
return last_op->mutable_channel(); return last_op->mutable_channel();
} }
void* call_back(void* ori_args) {
Resource::instance().thread_initialize();
Args* args = (Args*)ori_args;
Op* op = static_cast<Op*>(args->_op);
uint64_t log_id = static_cast<uint64_t>(args->_log_id);
bool debug = static_cast<bool>(args->_debug);
args->errcode = op->process(log_id, debug);
return nullptr;
}
int ParallelDagView::execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size();
std::vector<THREAD_T> tids(node_size);
Args* args = new Args[node_size];
VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf;
Op* op = vnode->op;
TRACEPRINTF(
"(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
args[ni]._op = op;
args[ni]._log_id = log_id;
args[ni]._debug = (debug_os != NULL);
int rc = THREAD_CREATE(&tids[ni], NULL, call_back, (void*)(args + ni));
if (rc != 0) {
LOG(ERROR) << "failed to create ParallelDagView worker thread: index="
<< ni << ", rc=" << rc << ", errno=" << errno << ":"
<< strerror(errno);
delete[] args;
return -1;
}
}
for (uint32_t ni = 0; ni < node_size; ni++) {
THREAD_JOIN(tids[ni], NULL);
int errcode = args[ni].errcode;
Op* op = args[ni]._op;
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) {
LOG(ERROR) << "(logid=" << log_id
<< ") Execute failed, Op:" << op->debug_string();
delete[] args;
return errcode;
}
if (errcode > 0) {
LOG(INFO) << "(logid=" << log_id
<< ") Execute ignore, Op:" << op->debug_string();
continue;
}
if (debug_os) {
(*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}";
}
// LOG(DEBUG) << "Execute succ, Op:" << op->debug_string();
}
stage_time.stop();
PredictorMetric::GetInstance()->update_latency_metric(
STAGE_METRIC_PREFIX + vstage->full_name, stage_time.u_elapsed());
delete[] args;
return ERR_OK;
}
} // namespace predictor } // namespace predictor
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -24,7 +24,7 @@ namespace baidu { ...@@ -24,7 +24,7 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
class Op; // class Op;
struct ViewNode { struct ViewNode {
Op* op; // op->full_name == service_workflow_stageindex_opname Op* op; // op->full_name == service_workflow_stageindex_opname
...@@ -75,11 +75,20 @@ class DagView { ...@@ -75,11 +75,20 @@ class DagView {
Bus* _bus; Bus* _bus;
}; };
struct Args {
Op* _op;
uint64_t _log_id;
bool _debug;
int errcode;
};
// The derived DagView supports parallel execution // The derived DagView supports parallel execution
// strategy, by implments the execute_one_stage(). // strategy, by implments the execute_one_stage().
class ParallelDagView : public DagView { class ParallelDagView : public DagView {
public: public:
int execute_one_stage(ViewStage* vstage, butil::IOBufBuilder*) { return 0; } virtual int execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os);
}; };
} // namespace predictor } // namespace predictor
......
...@@ -25,7 +25,8 @@ int ReloadableInferEngine::proc_initialize_impl( ...@@ -25,7 +25,8 @@ int ReloadableInferEngine::proc_initialize_impl(
_model_dir = conf.model_dir(); _model_dir = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num(); _infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size(); _infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align(); _infer_overrun = conf.enable_overrun();
_allow_split_request = conf.allow_split_request();
_conf = conf; _conf = conf;
...@@ -56,9 +57,6 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -56,9 +57,6 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
} }
// init bsf framework // init bsf framework
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index] im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_thread_init_fn( .set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this)); boost::bind(&InferEngine::thrd_initialize_impl, this));
...@@ -69,8 +67,10 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -69,8 +67,10 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
boost::bind(&InferEngine::task_infer_impl, this, _1, _2)); boost::bind(&InferEngine::task_infer_impl, this, _1, _2));
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_size( im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_size(
_infer_batch_size); _infer_batch_size);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_batch_align( im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].set_overrun(
_infer_batch_align); _infer_overrun);
im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index]
.set_allow_split_request(_allow_split_request);
if (im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].start( if (im::bsf::TaskExecutorVector<TaskT>::instance()[_model_index].start(
_infer_thread_num) != 0) { _infer_thread_num) != 0) {
LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num; LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num;
...@@ -79,7 +79,8 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf, ...@@ -79,7 +79,8 @@ int ReloadableInferEngine::proc_initialize(const configure::EngineDesc& conf,
LOG(WARNING) << "Enable batch schedule framework, thread_num:" LOG(WARNING) << "Enable batch schedule framework, thread_num:"
<< _infer_thread_num << ", batch_size:" << _infer_batch_size << _infer_thread_num << ", batch_size:" << _infer_batch_size
<< ", enable_batch_align:" << _infer_batch_align; << ", enable_overrun:" << _infer_overrun
<< ", allow_split_request:" << _allow_split_request;
return 0; return 0;
} }
...@@ -348,7 +349,7 @@ T* VersionedInferEngine::get_core() { ...@@ -348,7 +349,7 @@ T* VersionedInferEngine::get_core() {
} }
template <typename T> template <typename T>
T* VersionedInferEngine::get_core(uint64_t version) { T* VersionedInferEngine::get_core(const uint64_t version) {
auto iter = _versions.find(version); auto iter = _versions.find(version);
if (iter == _versions.end()) { if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version; LOG(ERROR) << "Not found version engine: " << version;
...@@ -363,6 +364,15 @@ T* VersionedInferEngine::get_core(uint64_t version) { ...@@ -363,6 +364,15 @@ T* VersionedInferEngine::get_core(uint64_t version) {
return NULL; return NULL;
} }
CubeCache* VersionedInferEngine::get_cube_cache() {
InferEngine* engine = default_engine();
if (!engine) {
LOG(WARNING) << "fail to get default engine";
return nullptr;
}
return engine->get_cube_cache();
}
int VersionedInferEngine::proc_initialize_impl( int VersionedInferEngine::proc_initialize_impl(
const configure::EngineDesc& conf, bool) { const configure::EngineDesc& conf, bool) {
return -1; return -1;
...@@ -382,6 +392,11 @@ int VersionedInferEngine::task_infer_impl(const void* in, ...@@ -382,6 +392,11 @@ int VersionedInferEngine::task_infer_impl(const void* in,
return -1; return -1;
} }
int InferManager::set_taskexecutor_num(size_t total_engine_num) {
im::bsf::TaskExecutorVector<TaskT>::instance().resize(total_engine_num);
return 0;
}
int InferManager::proc_initialize(const char* path, int InferManager::proc_initialize(const char* path,
const char* file, const char* file,
std::shared_ptr<int> engine_index_ptr) { std::shared_ptr<int> engine_index_ptr) {
...@@ -391,8 +406,6 @@ int InferManager::proc_initialize(const char* path, ...@@ -391,8 +406,6 @@ int InferManager::proc_initialize(const char* path,
return -1; return -1;
} }
uint32_t engine_num = model_toolkit_conf.engines_size(); uint32_t engine_num = model_toolkit_conf.engines_size();
im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr +
engine_num);
for (uint32_t ei = 0; ei < engine_num; ++ei) { for (uint32_t ei = 0; ei < engine_num; ++ei) {
LOG(INFO) << "model_toolkit_conf.engines(" << ei LOG(INFO) << "model_toolkit_conf.engines(" << ei
<< ").name: " << model_toolkit_conf.engines(ei).name(); << ").name: " << model_toolkit_conf.engines(ei).name();
...@@ -502,6 +515,15 @@ T* InferManager::get_core(const char* model_name) { ...@@ -502,6 +515,15 @@ T* InferManager::get_core(const char* model_name) {
return NULL; return NULL;
} }
CubeCache* InferManager::get_cube_cache(const char* model_name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
return nullptr;
}
return it->second->get_cube_cache();
}
// Versioned inference interface // Versioned inference interface
int InferManager::infer(const char* model_name, int InferManager::infer(const char* model_name,
const void* in, const void* in,
...@@ -517,7 +539,7 @@ int InferManager::infer(const char* model_name, ...@@ -517,7 +539,7 @@ int InferManager::infer(const char* model_name,
} }
template <typename T> template <typename T>
T* InferManager::get_core(const char* model_name, uint64_t version) { T* InferManager::get_core(const char* model_name, const uint64_t version) {
auto it = _map.find(model_name); auto it = _map.find(model_name);
if (it == _map.end()) { if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include <pthread.h> #include <pthread.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <functional> #include <functional>
...@@ -25,16 +26,19 @@ ...@@ -25,16 +26,19 @@
#include <vector> #include <vector>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/bsf.h" #include "core/predictor/framework/bsf.h"
#include "core/predictor/framework/cache.h"
#include "core/predictor/framework/factory.h" #include "core/predictor/framework/factory.h"
#include "core/predictor/framework/infer_data.h" #include "core/predictor/framework/infer_data.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
#include "experimental/float16.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
using configure::ModelToolkitConf; using configure::ModelToolkitConf;
// Auto mutex lock
class AutoLock { class AutoLock {
public: public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
...@@ -46,6 +50,7 @@ class AutoLock { ...@@ -46,6 +50,7 @@ class AutoLock {
pthread_mutex_t& _mut; pthread_mutex_t& _mut;
}; };
// Gloabl singleton mutex lock
class GlobalCreateMutex { class GlobalCreateMutex {
public: public:
pthread_mutex_t& mutex() { return _mut; } pthread_mutex_t& mutex() { return _mut; }
...@@ -60,6 +65,7 @@ class GlobalCreateMutex { ...@@ -60,6 +65,7 @@ class GlobalCreateMutex {
pthread_mutex_t _mut; pthread_mutex_t _mut;
}; };
// InferEngine
class InferEngine { class InferEngine {
public: public:
virtual ~InferEngine() {} virtual ~InferEngine() {}
...@@ -90,11 +96,13 @@ class InferEngine { ...@@ -90,11 +96,13 @@ class InferEngine {
void* out, void* out,
uint32_t batch_size = -1) = 0; uint32_t batch_size = -1) = 0;
virtual int task_infer_impl(const void* in, void* out) = 0; // NOLINT virtual int task_infer_impl(const void* in, void* out) = 0; // NOLINT
virtual CubeCache* get_cube_cache() = 0;
protected: protected:
uint32_t _model_index; uint32_t _model_index;
// end: framework inner call // end: framework inner call
}; };
typedef im::bsf::Task<paddle::PaddleTensor, paddle::PaddleTensor> TaskT; typedef im::bsf::Task<paddle::PaddleTensor, paddle::PaddleTensor> TaskT;
class ReloadableInferEngine : public InferEngine { class ReloadableInferEngine : public InferEngine {
public: public:
...@@ -163,28 +171,37 @@ class ReloadableInferEngine : public InferEngine { ...@@ -163,28 +171,37 @@ class ReloadableInferEngine : public InferEngine {
uint32_t _infer_batch_size; uint32_t _infer_batch_size;
// Need to align batch_size in inferring // Need to align batch_size in inferring
bool _infer_batch_align; bool _infer_overrun;
// allow to split request in inferring
bool _allow_split_request;
// model version // model version
uint64_t _version; uint64_t _version;
}; };
// Lock free switching two models // Lock free switching two models and cube caches
template <typename EngineCore> template <typename EngineCore>
struct ModelData { struct ModelData {
ModelData() : current_idx(1) { ModelData() : current_idx(1) {
cores[0] = NULL; cores[0] = nullptr;
cores[1] = NULL; cores[1] = nullptr;
caches[0] = nullptr;
caches[1] = nullptr;
} }
~ModelData() { ~ModelData() {
delete cores[0]; delete cores[0];
delete cores[1]; delete cores[1];
delete caches[0];
delete caches[1];
} }
void* get() { return cores[current_idx]->get(); } void* get_core() { return cores[current_idx]->get(); }
CubeCache* get_cache() { return caches[current_idx]; }
EngineCore* cores[2]; EngineCore* cores[2];
CubeCache* caches[2];
uint32_t current_idx; uint32_t current_idx;
}; };
...@@ -196,7 +213,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -196,7 +213,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
int proc_initialize(const configure::EngineDesc& conf, bool version) { int proc_initialize(const configure::EngineDesc& conf, bool version) {
THREAD_KEY_CREATE(&_skey, NULL); THREAD_KEY_CREATE(&_skey, NULL);
THREAD_MUTEX_INIT(&_mutex, NULL); THREAD_MUTEX_INIT(&_mutex, NULL);
gpu_index = 0; _gpu_index = 0;
return ReloadableInferEngine::proc_initialize(conf, version); return ReloadableInferEngine::proc_initialize(conf, version);
} }
...@@ -209,7 +226,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -209,7 +226,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
if (_reload_vec.empty()) { if (_reload_vec.empty()) {
return 0; return 0;
} }
gpu_index = 0; _gpu_index = 0;
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) { for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], conf) != 0) { if (load_data(_reload_vec[ti], conf) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti; LOG(ERROR) << "Failed reload engine model: " << ti;
...@@ -224,26 +241,56 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -224,26 +241,56 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
virtual int load_data(ModelData<EngineCore>* md, virtual int load_data(ModelData<EngineCore>* md,
const configure::EngineDesc& conf) { const configure::EngineDesc& conf) {
uint32_t next_idx = (md->current_idx + 1) % 2; uint32_t next_idx = (md->current_idx + 1) % 2;
// reload engine core
if (md->cores[next_idx]) { if (md->cores[next_idx]) {
delete md->cores[next_idx]; delete md->cores[next_idx];
} }
md->cores[next_idx] = new (std::nothrow) EngineCore; md->cores[next_idx] = new (std::nothrow) EngineCore;
if (nullptr == md->cores[next_idx]) {
// params.dump(); LOG(ERROR) << "Allocating memory failed. ";
return -1;
}
size_t gpu_ids_num = conf.gpu_ids_size(); size_t gpu_ids_num = conf.gpu_ids_size();
im::bsf::AutoMutex lock(_mutex); im::bsf::AutoMutex lock(_mutex);
int gpu_id = -1; int gpu_id = -1;
if (gpu_ids_num > 0) { if (gpu_ids_num > 0) {
gpu_id = conf.gpu_ids(gpu_index % gpu_ids_num); gpu_id = conf.gpu_ids(_gpu_index % gpu_ids_num);
} }
LOG(WARNING) << "Loading EngineCore[" << next_idx << "] ...";
if (!md->cores[next_idx] || if (!md->cores[next_idx] ||
md->cores[next_idx]->create(conf, gpu_id) != 0) { md->cores[next_idx]->create(conf, gpu_id) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir(); LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1; return -1;
} }
gpu_index++; _gpu_index++;
LOG(WARNING) << "Loading EngineCore[" << next_idx << "] done.";
// reload cube cache
if (nullptr == md->caches[next_idx]) {
md->caches[next_idx] = new (std::nothrow) CubeCache;
}
if (nullptr == md->caches[next_idx]) {
LOG(ERROR) << "Allocating memory failed.";
return -1;
}
LOG(WARNING) << "Loading cube cache[" << next_idx << "] ...";
std::string model_path = conf.model_dir();
if (access(model_path.c_str(), F_OK) == 0) {
std::string cube_cache_path = model_path + "/cube_cache";
int reload_cache_ret = md->caches[next_idx]->reload_data(cube_cache_path);
LOG(WARNING) << "Loading cube cache[" << next_idx << "] done.";
} else {
LOG(ERROR) << "model_path " << model_path
<< " is not exits. Ignore cube cache!";
}
// switch current_idx
md->current_idx = next_idx; md->current_idx = next_idx;
LOG(WARNING)
<< "Reload model and cube cache done. switching to current_idx["
<< next_idx << "]";
return 0; return 0;
} }
...@@ -309,11 +356,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -309,11 +356,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return md->cores[md->current_idx]; return md->cores[md->current_idx];
} }
CubeCache* get_cube_cache() {
ModelData<EngineCore>* md =
(ModelData<EngineCore>*)THREAD_GETSPECIFIC(_skey);
if (!md) {
LOG(ERROR) << "Failed get thread specific data";
return NULL;
}
return md->get_cache();
}
protected: protected:
THREAD_KEY_T _skey; THREAD_KEY_T _skey;
THREAD_MUTEX_T _mutex; THREAD_MUTEX_T _mutex;
// vector of all model engines
std::vector<ModelData<EngineCore>*> _reload_vec; std::vector<ModelData<EngineCore>*> _reload_vec;
int gpu_index = 0;
// gpu card id
int _gpu_index = 0;
}; };
// 多个EngineCore共用同一份模型数据 // 多个EngineCore共用同一份模型数据
...@@ -331,12 +392,20 @@ class CloneDBReloadableInferEngine ...@@ -331,12 +392,20 @@ class CloneDBReloadableInferEngine
virtual int load_data(ModelData<EngineCore>* md, virtual int load_data(ModelData<EngineCore>* md,
const configure::EngineDesc& conf) { const configure::EngineDesc& conf) {
int tid = syscall(SYS_gettid);
uint32_t next_idx = (md->current_idx + 1) % 2; uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) { if (md->cores[next_idx]) {
delete md->cores[next_idx]; delete md->cores[next_idx];
} }
md->cores[next_idx] = new (std::nothrow) EngineCore; md->cores[next_idx] = new (std::nothrow) EngineCore;
if (nullptr == md->caches[next_idx]) {
md->caches[next_idx] = new (std::nothrow) CubeCache;
}
if (nullptr == md->cores[next_idx] || nullptr == md->caches[next_idx]) {
LOG(ERROR) << "Allocating memory fail.";
return -1;
}
// params.dump(); // params.dump();
// gpu_ids_num > 0 is always true. // gpu_ids_num > 0 is always true.
// if use CPU, gpu_ids = [-1]. // if use CPU, gpu_ids = [-1].
...@@ -347,46 +416,70 @@ class CloneDBReloadableInferEngine ...@@ -347,46 +416,70 @@ class CloneDBReloadableInferEngine
im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex); im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
int gpu_id = -1; int gpu_id = -1;
if (gpu_ids_num > 0) { if (gpu_ids_num > 0) {
gpu_id = conf.gpu_ids(DBReloadableInferEngine<EngineCore>::gpu_index % gpu_id = conf.gpu_ids(DBReloadableInferEngine<EngineCore>::_gpu_index %
gpu_ids_num); gpu_ids_num);
} else { } else {
gpu_ids_num = 1; gpu_ids_num = 1;
} }
// gpu_index will be set to be 0, when load() or proc_initial() is called.
// gpu_index < gpu_ids_num, means there are predictors still not create // _gpu_index will be set to be 0, when load() or proc_initial() is called.
// _gpu_index < gpu_ids_num, means there are predictors still not create
// on some GPU card. // on some GPU card.
// so we need to create the predictor. // so we need to create the predictor.
// gpu_index >= gpu_ids_num, means each GPU card has already create one. // _gpu_index >= gpu_ids_num, means each GPU card has already create one.
// so we need to clone the predictor. // so we need to clone the predictor.
if (DBReloadableInferEngine<EngineCore>::gpu_index < gpu_ids_num) { LOG(WARNING) << "tid:" << tid << " Loading clone model ...";
if (!md->cores[next_idx] || if (DBReloadableInferEngine<EngineCore>::_gpu_index < gpu_ids_num) {
md->cores[next_idx]->create(conf, gpu_id) != 0) { // create cores
if (md->cores[next_idx]->create(conf, gpu_id) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir(); LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1; return -1;
} }
DBReloadableInferEngine<EngineCore>::gpu_index++; // create caches
md->current_idx = next_idx; std::string model_path = conf.model_dir();
if (access(model_path.c_str(), F_OK) == 0) {
std::string cube_cache_path = model_path + "/cube_cache";
int reload_cache_ret =
md->caches[next_idx]->reload_data(cube_cache_path);
LOG(WARNING) << "create cube cache[" << next_idx << "] done.";
} else {
LOG(WARNING) << "model_path " << model_path
<< " is not exits. Ignore cube cache!";
}
DBReloadableInferEngine<EngineCore>::_gpu_index++;
// md->current_idx = next_idx;
if (_cloneTemplate.size() < if (_cloneTemplate.size() <
DBReloadableInferEngine<EngineCore>::gpu_index) { DBReloadableInferEngine<EngineCore>::_gpu_index) {
_cloneTemplate.push_back(md); _cloneTemplate.push_back(md);
} else { } else {
_cloneTemplate[DBReloadableInferEngine<EngineCore>::gpu_index - 1] = md; _cloneTemplate[DBReloadableInferEngine<EngineCore>::_gpu_index - 1] =
md;
} }
} else { } else {
int template_index = DBReloadableInferEngine<EngineCore>::gpu_index % int template_index = DBReloadableInferEngine<EngineCore>::_gpu_index %
_cloneTemplate.size(); _cloneTemplate.size();
if (!md->cores[next_idx] ||
md->cores[next_idx]->clone(_cloneTemplate[template_index]->get()) != // clone cores
0) { if (md->cores[next_idx]->clone(
_cloneTemplate[template_index]->get_core()) != 0) {
LOG(ERROR) << "Failed clone model from core"; LOG(ERROR) << "Failed clone model from core";
return -1; return -1;
} }
DBReloadableInferEngine<EngineCore>::gpu_index++; // clone caches
md->current_idx = next_idx; md->caches[next_idx] = _cloneTemplate[template_index]->get_cache();
LOG(WARNING) << "core clone model succ, cur_idx[" << md->current_idx LOG(WARNING) << "tid:" << tid << " clone caches done";
<< "].";
DBReloadableInferEngine<EngineCore>::_gpu_index++;
} }
// switch current_idx
md->current_idx = next_idx;
LOG(WARNING)
<< "[" << tid
<< "] Reload clone model and cube cache done. switching to current_idx["
<< next_idx << "]";
return 0; return 0;
} }
...@@ -441,7 +534,28 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> { ...@@ -441,7 +534,28 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
paddle::PaddleDType::INT32) { paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data); int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data); lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::UINT8) {
uint8_t* data = static_cast<uint8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT8) {
int8_t* data = static_cast<int8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::FLOAT16) {
paddle::platform::float16* data =
static_cast<paddle::platform::float16*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else {
LOG(ERROR) << "Inference not support type["
<< (*tensorVector_in_pointer)[i].dtype << "],name["
<< (*tensorVector_in_pointer)[i].name << "]"
<< " copy into core failed!";
} }
VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
<< ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
<< ";tensor_dtype=" << lod_tensor_in->type();
} }
// After the input data is passed in, // After the input data is passed in,
// call 'core->Run()' perform the prediction process. // call 'core->Run()' perform the prediction process.
...@@ -506,7 +620,39 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> { ...@@ -506,7 +620,39 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data); int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out); lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out); databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::UINT8) {
databuf_size = out_num * sizeof(uint8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
uint8_t* data_out = reinterpret_cast<uint8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::INT8) {
databuf_size = out_num * sizeof(int8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::FLOAT16) {
databuf_size = out_num * sizeof(paddle::platform::float16);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
paddle::platform::float16* data_out =
reinterpret_cast<paddle::platform::float16*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} }
// Because task scheduling requires OPs to use 'Channel' // Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs. // (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP. // We need to copy the processed data to the 'Channel' for the next OP.
...@@ -532,6 +678,10 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> { ...@@ -532,6 +678,10 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int task_infer_impl(const void* in, void* out) { // NOLINT int task_infer_impl(const void* in, void* out) { // NOLINT
return infer_impl(in, out); return infer_impl(in, out);
} }
CubeCache* get_cube_cache() {
return DBReloadableInferEngine<EngineCore>::get_cube_cache();
}
}; };
typedef FactoryPool<InferEngine> StaticInferFactory; typedef FactoryPool<InferEngine> StaticInferFactory;
...@@ -565,11 +715,13 @@ class VersionedInferEngine : public InferEngine { ...@@ -565,11 +715,13 @@ class VersionedInferEngine : public InferEngine {
template <typename T> template <typename T>
T* get_core(); T* get_core();
CubeCache* get_cube_cache();
// versioned inference interface // versioned inference interface
int infer(const void* in, void* out, uint32_t batch_size, uint64_t version); int infer(const void* in, void* out, uint32_t batch_size, uint64_t version);
template <typename T> template <typename T>
T* get_core(uint64_t version); T* get_core(const uint64_t version);
int proc_initialize_impl(const configure::EngineDesc& conf, bool); int proc_initialize_impl(const configure::EngineDesc& conf, bool);
...@@ -600,6 +752,8 @@ class InferManager { ...@@ -600,6 +752,8 @@ class InferManager {
const char* file, const char* file,
std::shared_ptr<int> engine_index_ptr); std::shared_ptr<int> engine_index_ptr);
int set_taskexecutor_num(size_t total_engine_num);
int thrd_initialize(); int thrd_initialize();
int thrd_clear(); int thrd_clear();
...@@ -616,9 +770,13 @@ class InferManager { ...@@ -616,9 +770,13 @@ class InferManager {
void* out, void* out,
uint32_t batch_size = -1); uint32_t batch_size = -1);
// get engine core
template <typename T> template <typename T>
T* get_core(const char* model_name); T* get_core(const char* model_name);
// get cube cache
CubeCache* get_cube_cache(const char* model_name);
// Versioned inference interface // Versioned inference interface
int infer(const char* model_name, int infer(const char* model_name,
const void* in, const void* in,
...@@ -626,9 +784,11 @@ class InferManager { ...@@ -626,9 +784,11 @@ class InferManager {
uint32_t batch_size, uint32_t batch_size,
uint64_t version); uint64_t version);
// Versioned get engine core
template <typename T> template <typename T>
T* get_core(const char* model_name, uint64_t version); T* get_core(const char* model_name, const uint64_t version);
// query model version
int query_version(const std::string& model, uint64_t& version); int query_version(const std::string& model, uint64_t& version);
private: private:
......
...@@ -135,6 +135,17 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -135,6 +135,17 @@ int Resource::initialize(const std::string& path, const std::string& file) {
if (FLAGS_enable_model_toolkit) { if (FLAGS_enable_model_toolkit) {
size_t model_toolkit_num = resource_conf.model_toolkit_path_size(); size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
// 此处暂时认为,每个model_toolkit仅包含一个engine
// 故认为 model_toolkit_num == engine总数
// 若以后出现model_toolkit仅包含多个engine
// 则应先for循环统计engine总数,再set_taskexecutor_num
// 切不可动态im::bsf::TaskExecutorVector<TaskT>::instance().resize
// TaskExecutor是线程池,内含锁,在engine进程初始化时已开始work加锁循环运行了
// 之后再resize内存搬运,会导致work使用原锁,而搬运后的TaskExecutor的锁内存已改变
if (InferManager::instance().set_taskexecutor_num(model_toolkit_num) != 0) {
LOG(ERROR) << "failed set_taskexecutor_num";
return -1;
}
std::shared_ptr<int> engine_index_ptr(new int(0)); std::shared_ptr<int> engine_index_ptr(new int(0));
for (size_t mi = 0; mi < model_toolkit_num; ++mi) { for (size_t mi = 0; mi < model_toolkit_num; ++mi) {
std::string model_toolkit_path = resource_conf.model_toolkit_path(mi); std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
...@@ -165,18 +176,18 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -165,18 +176,18 @@ int Resource::initialize(const std::string& path, const std::string& file) {
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
std::string cube_config_fullpath = "./" + resource_conf.cube_config_path() + std::string cube_config_fullpath = "./" + resource_conf.cube_config_path() +
"/" + resource_conf.cube_config_file(); "/" + resource_conf.cube_config_file();
this->cube_config_fullpath = cube_config_fullpath; this->_cube_config_fullpath = cube_config_fullpath;
this->cube_quant_bits = resource_conf.has_cube_quant_bits() this->_cube_quant_bits = resource_conf.has_cube_quant_bits()
? resource_conf.cube_quant_bits() ? resource_conf.cube_quant_bits()
: 0; : 0;
if (this->cube_quant_bits != 0 && this->cube_quant_bits != 8) { if (this->_cube_quant_bits != 0 && this->_cube_quant_bits != 8) {
LOG(ERROR) << "Cube quant bits illegal! should be 0 or 8."; LOG(ERROR) << "Cube quant bits illegal! should be 0 or 8.";
return -1; return -1;
} }
if (this->cube_quant_bits == 0) { if (this->_cube_quant_bits == 0) {
LOG(INFO) << "cube quant mode OFF"; LOG(INFO) << "cube quant mode OFF";
} else { } else {
LOG(INFO) << "cube quant mode ON, quant bits: " << this->cube_quant_bits; LOG(INFO) << "cube quant mode ON, quant bits: " << this->_cube_quant_bits;
} }
} }
...@@ -187,10 +198,10 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -187,10 +198,10 @@ int Resource::initialize(const std::string& path, const std::string& file) {
// model config // model config
int Resource::general_model_initialize(const std::string& path, int Resource::general_model_initialize(const std::string& path,
const std::string& file) { const std::string& file) {
if (this->cube_config_fullpath.size() != 0) { if (this->_cube_config_fullpath.size() != 0) {
LOG(INFO) << "init cube by config file : " << this->cube_config_fullpath; LOG(INFO) << "init cube by config file : " << this->_cube_config_fullpath;
rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI* cube = rec::mcube::CubeAPI::instance();
int ret = cube->init(this->cube_config_fullpath.c_str()); int ret = cube->init(this->_cube_config_fullpath.c_str());
if (ret != 0) { if (ret != 0) {
LOG(ERROR) << "cube init error"; LOG(ERROR) << "cube init error";
return -1; return -1;
...@@ -315,7 +326,7 @@ int Resource::thread_clear() { ...@@ -315,7 +326,7 @@ int Resource::thread_clear() {
} }
return 0; return 0;
} }
size_t Resource::get_cube_quant_bits() { return this->cube_quant_bits; } size_t Resource::get_cube_quant_bits() { return this->_cube_quant_bits; }
int Resource::reload() { int Resource::reload() {
if (FLAGS_enable_model_toolkit && InferManager::instance().reload() != 0) { if (FLAGS_enable_model_toolkit && InferManager::instance().reload() != 0) {
......
...@@ -16,8 +16,10 @@ ...@@ -16,8 +16,10 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "core/cube/cube-api/include/cube_api.h" #include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
...@@ -27,6 +29,8 @@ namespace baidu { ...@@ -27,6 +29,8 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
// Paddle general model configuration, read the model configuration information
// from the general_model_config.proto file
class PaddleGeneralModelConfig { class PaddleGeneralModelConfig {
public: public:
PaddleGeneralModelConfig() {} PaddleGeneralModelConfig() {}
...@@ -34,23 +38,47 @@ class PaddleGeneralModelConfig { ...@@ -34,23 +38,47 @@ class PaddleGeneralModelConfig {
~PaddleGeneralModelConfig() {} ~PaddleGeneralModelConfig() {}
public: public:
// feed/fetch name and alias_name
std::vector<std::string> _feed_name; std::vector<std::string> _feed_name;
std::vector<std::string> _feed_alias_name; std::vector<std::string> _feed_alias_name;
std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<bool> _is_lod_feed; // true lod tensor
std::vector<bool> _is_lod_fetch; // whether a fetch var is lod_tensor
std::vector<int> _capacity; // capacity for each tensor
/*
feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/
std::vector<std::vector<int>> _feed_shape;
std::vector<std::string> _fetch_name; std::vector<std::string> _fetch_name;
std::vector<std::string> _fetch_alias_name; std::vector<std::string> _fetch_alias_name;
// Be consistent with model saving interface var type conversion
// (python/paddle serving client/io/__init__)
// int64 => 0;
// float32 => 1;
// int32 => 2;
// float64 => 3;
// int16 => 4;
// float16 => 5;
// bfloat16 => 6;
// uint8 => 7;
// int8 => 8;
// bool => 9;
// complex64 => 10,
// complex128 => 11;
std::vector<int> _feed_type;
// whether a feed or fetch var is lod_tensor.
std::vector<bool> _is_lod_feed;
std::vector<bool> _is_lod_fetch;
// capacity for each tensor
std::vector<int> _capacity;
// _feed_shape and _fetch_shape are used to represent the dimensional
// information of tensor.
// for examples, feed_shape_[i][j] represents the j(th) dim for i(th) input
// tensor.
// if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
std::vector<std::vector<int>> _feed_shape;
std::vector<std::vector<int>> _fetch_shape; std::vector<std::vector<int>> _fetch_shape;
// fetch name -> index of fetch_name vector.
std::map<std::string, int> _fetch_name_to_index; std::map<std::string, int> _fetch_name_to_index;
// fetch alias name -> index of fetch_alias_name vector.
std::map<std::string, int> _fetch_alias_name_to_index; std::map<std::string, int> _fetch_alias_name_to_index;
}; };
...@@ -73,33 +101,50 @@ class Resource { ...@@ -73,33 +101,50 @@ class Resource {
return ins; return ins;
} }
// initialize resource
int initialize(const std::string& path, const std::string& file); int initialize(const std::string& path, const std::string& file);
// loading all models configurations from prototxt
int general_model_initialize(const std::string& path, int general_model_initialize(const std::string& path,
const std::string& file); const std::string& file);
// initialize thread local data
int thread_initialize(); int thread_initialize();
// clear thread local data
int thread_clear(); int thread_clear();
// reload resources
int reload(); int reload();
// finalize
int finalize(); int finalize();
// get all model configs
std::vector<std::shared_ptr<PaddleGeneralModelConfig>> std::vector<std::shared_ptr<PaddleGeneralModelConfig>>
get_general_model_config(); get_general_model_config();
// print all configurations of all models
void print_general_model_config( void print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig>& config); const std::shared_ptr<PaddleGeneralModelConfig>& config);
// get cube quantity bit size
size_t get_cube_quant_bits(); size_t get_cube_quant_bits();
private: private:
int thread_finalize() { return 0; } int thread_finalize() { return 0; }
private:
// configuration infermation of all models, loading from prototxt files
std::vector<std::shared_ptr<PaddleGeneralModelConfig>> _configs; std::vector<std::shared_ptr<PaddleGeneralModelConfig>> _configs;
std::string cube_config_fullpath;
int cube_quant_bits; // 0 if no empty
// full path of cube configuration file.
std::string _cube_config_fullpath;
// cube quantify bit size, support 0/8. set 0 if no quant.
size_t _cube_quant_bits;
// bthread local key
THREAD_KEY_T _tls_bspec_key; THREAD_KEY_T _tls_bspec_key;
}; };
......
...@@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, ...@@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
else if (resize_h / 32 < 1 + 1e-5) else if (resize_h / 32 < 1 + 1e-5)
resize_h = 32; resize_h = 32;
else else
resize_h = (resize_h / 32) * 32; resize_h = (resize_h / 32 - 1) * 32;
if (resize_w % 32 == 0) if (resize_w % 32 == 0)
resize_w = resize_w; resize_w = resize_w;
else if (resize_w / 32 < 1 + 1e-5) else if (resize_w / 32 < 1 + 1e-5)
resize_w = 32; resize_w = 32;
else else
resize_w = (resize_w / 32) * 32; resize_w = (resize_w / 32 - 1) * 32;
if (!use_tensorrt) { if (!use_tensorrt) {
cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
ratio_h = float(resize_h) / float(h); ratio_h = float(resize_h) / float(h);
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
import "pds_option.proto"; import "pds_option.proto";
import "builtin_format.proto"; import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; ...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true; option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -12,7 +12,7 @@ BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据 ...@@ -12,7 +12,7 @@ BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据
### Http+protobuf方式 ### Http+protobuf方式
各种语言都提供了对ProtoBuf的支持,如果您对此比较熟悉,您也可以先将数据使用ProtoBuf序列化,再将序列化后的数据放入Http请求数据体中,然后指定Content-Type: application/proto,从而使用http/h2+protobuf二进制串访问服务。 各种语言都提供了对ProtoBuf的支持,如果您对此比较熟悉,您也可以先将数据使用ProtoBuf序列化,再将序列化后的数据放入Http请求数据体中,然后指定Content-Type: application/proto,从而使用http/h2+protobuf二进制串访问服务。
实测随着数据量的增大,使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加,推荐当您的数据量较大时,使用Http+protobuf方式,后续我们会在框架的HttpClient中增加该功能,目前暂没有支持。 实测随着数据量的增大,使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加,推荐当您的数据量较大时,使用Http+protobuf方式,目前已经在Java和Python的Client端提供了支持。
**理论上讲,序列化/反序列化的性能从高到底排序为:protobuf > http/h2+protobuf > http** **理论上讲,序列化/反序列化的性能从高到底排序为:protobuf > http/h2+protobuf > http**
...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 - ...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。 为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。
使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。 使用HttpClient最简单只需要四步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数,通过Http方式请求预测服务。
此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。 此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。
...@@ -52,7 +52,9 @@ Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClien ...@@ -52,7 +52,9 @@ Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClien
如果不能满足您的需求,您也可以在此基础上添加一些功能。 如果不能满足您的需求,您也可以在此基础上添加一些功能。
如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发,请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md,后续如果需求很大,我们也会将这部分功能加入到Server中,尽情期待。 如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发,请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md
后续如果需求很大,我们也会将这部分功能加入到Server中,尽情期待。
### curl方式发送Http请求(基本原理) ### curl方式发送Http请求(基本原理)
...@@ -101,7 +103,7 @@ repeated int32 numbers = 1; ...@@ -101,7 +103,7 @@ repeated int32 numbers = 1;
``` ```
#### elem_type #### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string) 表示数据类型,0 means int64, 1 means float32, 2 means int32, 20 means bytes(string)
#### fetch_var_names #### fetch_var_names
......
...@@ -7,8 +7,8 @@ ...@@ -7,8 +7,8 @@
为了方便用户使用java进行开发,我们提供了编译好的Serving工程放置在java镜像当中,获取镜像并进入开发环境的方式是 为了方便用户使用java进行开发,我们提供了编译好的Serving工程放置在java镜像当中,获取镜像并进入开发环境的方式是
``` ```
docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-java docker pull registry.baidubce.com/paddlepaddle/serving:0.6.0-java
docker run --rm -dit --name java_serving registry.baidubce.com/paddlepaddle/serving:0.5.0-java docker run --rm -dit --name java_serving registry.baidubce.com/paddlepaddle/serving:0.6.0-java
docker exec -it java_serving bash docker exec -it java_serving bash
cd Serving/java cd Serving/java
``` ```
...@@ -29,7 +29,7 @@ mvn install ...@@ -29,7 +29,7 @@ mvn install
## 请求BRPC-Server ## 请求BRPC-Server
###服务端启动 ### 服务端启动
以fit_a_line模型为例,服务端启动与常规BRPC-Server端启动命令一样。 以fit_a_line模型为例,服务端启动与常规BRPC-Server端启动命令一样。
...@@ -39,7 +39,7 @@ sh get_data.sh ...@@ -39,7 +39,7 @@ sh get_data.sh
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
``` ```
###客户端预测 ### 客户端预测
客户端目前支持多种请求方式,目前支持HTTP(数据为JSON格式)、HTTP(数据为PROTO格式)、GRPC 客户端目前支持多种请求方式,目前支持HTTP(数据为JSON格式)、HTTP(数据为PROTO格式)、GRPC
推荐您使用HTTP(数据为PROTO格式),此时数据体为PROTO格式,传输的数据量小,速度快,目前已经帮用户实现了HTTP/GRPC的数据体(JSON/PROTO)的封装函数,详见[Client.java](./src/main/java/io/paddle/serving/client/Client.java) 推荐您使用HTTP(数据为PROTO格式),此时数据体为PROTO格式,传输的数据量小,速度快,目前已经帮用户实现了HTTP/GRPC的数据体(JSON/PROTO)的封装函数,详见[Client.java](./src/main/java/io/paddle/serving/client/Client.java)
...@@ -47,14 +47,14 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -47,14 +47,14 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
cd ../../../java/examples/target cd ../../../java/examples/target
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample http_proto <configPath> java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample http_proto <configPath>
``` ```
**注意 <configPath>为客户端配置文件,一般是名为serving_client_conf.prototxt的文件。** **注意 `<configPath>`为客户端配置文件,一般是名为serving_client_conf.prototxt的文件。**
更多示例详见[PaddleServingClientExample.java](./examples/src/main/java/PaddleServingClientExample.java) 更多示例详见[PaddleServingClientExample.java](./examples/src/main/java/PaddleServingClientExample.java)
## 请求Pipeline-Server ## 请求Pipeline-Server
###服务端启动 ### 服务端启动
对于input data type = string类型,以IMDB model ensemble模型为例,服务端启动 对于input data type = string类型,以IMDB model ensemble模型为例,服务端启动
...@@ -66,14 +66,14 @@ python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow. ...@@ -66,14 +66,14 @@ python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.
python test_pipeline_server.py &>pipeline.log & python test_pipeline_server.py &>pipeline.log &
``` ```
客户端预测(同步) ### 客户端预测(同步)
``` ```
cd ../../../java/examples/target cd ../../../java/examples/target
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PipelineClientExample string_imdb_predict java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PipelineClientExample string_imdb_predict
``` ```
客户端预测(异步) ### 客户端预测(异步)
``` ```
cd ../../../java/examples/target cd ../../../java/examples/target
...@@ -81,7 +81,7 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli ...@@ -81,7 +81,7 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli
``` ```
对于input data type = INDArray类型,以Simple Pipeline WebService中的uci_housing_model模型为例,服务端启动 ### 对于input data type = INDArray类型,以Simple Pipeline WebService中的uci_housing_model模型为例,服务端启动
``` ```
cd ../../python/examples/pipeline/simple_web_service cd ../../python/examples/pipeline/simple_web_service
...@@ -89,7 +89,7 @@ sh get_data.sh ...@@ -89,7 +89,7 @@ sh get_data.sh
python web_service_java.py &>log.txt & python web_service_java.py &>log.txt &
``` ```
客户端预测(同步) ### 客户端预测(同步)
``` ```
cd ../../../java/examples/target cd ../../../java/examples/target
...@@ -98,7 +98,7 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli ...@@ -98,7 +98,7 @@ java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar Pipeli
### 注意事项 ### 注意事项
1.在示例中,端口号都是9393,ip默认设置为了0.0.0.0表示本机,注意ip和port需要与Server端对应。 1.在示例中,端口号都是9393,ip默认设置为了127.0.0.1表示本机,注意ip和port需要与Server端对应。
2.目前Serving已推出Pipeline模式(原理详见[Pipeline Serving](../doc/PIPELINE_SERVING_CN.md)),面向Java的Pipeline Serving Client已发布。 2.目前Serving已推出Pipeline模式(原理详见[Pipeline Serving](../doc/PIPELINE_SERVING_CN.md)),面向Java的Pipeline Serving Client已发布。
......
...@@ -25,7 +25,7 @@ public class PaddleServingClientExample { ...@@ -25,7 +25,7 @@ public class PaddleServingClientExample {
List<String> fetch = Arrays.asList("price"); List<String> fetch = Arrays.asList("price");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
String result = client.predict(feed_data, fetch, true, 0); String result = client.predict(feed_data, fetch, true, 0);
...@@ -49,7 +49,7 @@ public class PaddleServingClientExample { ...@@ -49,7 +49,7 @@ public class PaddleServingClientExample {
Client client = new Client(); Client client = new Client();
//注意:跨docker,需要设置--net-host或直接访问另一个docker的ip //注意:跨docker,需要设置--net-host或直接访问另一个docker的ip
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.set_http_proto(false); client.set_http_proto(false);
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
...@@ -73,7 +73,7 @@ public class PaddleServingClientExample { ...@@ -73,7 +73,7 @@ public class PaddleServingClientExample {
List<String> fetch = Arrays.asList("price"); List<String> fetch = Arrays.asList("price");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
client.set_use_grpc_client(true); client.set_use_grpc_client(true);
...@@ -97,7 +97,7 @@ public class PaddleServingClientExample { ...@@ -97,7 +97,7 @@ public class PaddleServingClientExample {
List<String> fetch = Arrays.asList("price"); List<String> fetch = Arrays.asList("price");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
client.use_key(keyFilePath); client.use_key(keyFilePath);
...@@ -125,7 +125,7 @@ public class PaddleServingClientExample { ...@@ -125,7 +125,7 @@ public class PaddleServingClientExample {
List<String> fetch = Arrays.asList("price"); List<String> fetch = Arrays.asList("price");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
client.set_request_compress(true); client.set_request_compress(true);
...@@ -176,7 +176,7 @@ public class PaddleServingClientExample { ...@@ -176,7 +176,7 @@ public class PaddleServingClientExample {
}}; }};
List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0"); List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
String result = client.predict(feed_data, fetch, true, 0); String result = client.predict(feed_data, fetch, true, 0);
...@@ -198,7 +198,7 @@ public class PaddleServingClientExample { ...@@ -198,7 +198,7 @@ public class PaddleServingClientExample {
}}; }};
List<String> fetch = Arrays.asList("pooled_output"); List<String> fetch = Arrays.asList("pooled_output");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
String result = client.predict(feed_data, fetch, true, 0); String result = client.predict(feed_data, fetch, true, 0);
...@@ -268,7 +268,7 @@ public class PaddleServingClientExample { ...@@ -268,7 +268,7 @@ public class PaddleServingClientExample {
}}; }};
List<String> fetch = Arrays.asList("prob"); List<String> fetch = Arrays.asList("prob");
Client client = new Client(); Client client = new Client();
client.setIP("0.0.0.0"); client.setIP("127.0.0.1");
client.setPort("9393"); client.setPort("9393");
client.loadClientConfig(model_config_path); client.loadClientConfig(model_config_path);
String result = client.predict(feed_data, fetch, true, 0); String result = client.predict(feed_data, fetch, true, 0);
......
...@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream; ...@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
enum ElementType class ElementType {
{ public static final int Int64_type = 0;
Int64_type, Float32_type, Int32_type, Bytes_type; public static final int Float32_type = 1;
public static final int Int32_type = 2;
public static final int String_type = 20;
public static final Map<Integer, String> feedTypeToDataKey_;
static
{
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(ElementType.Int64_type, "int64_data");
feedTypeToDataKey_.put(ElementType.Float32_type, "float_data");
feedTypeToDataKey_.put(ElementType.Int32_type, "int_data");
feedTypeToDataKey_.put(ElementType.String_type, "data");
}
} }
class Profiler { class Profiler {
...@@ -104,7 +115,6 @@ public class Client { ...@@ -104,7 +115,6 @@ public class Client {
private Map<String, Integer> feedTypes_; private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_; private Map<String, List<Integer>> feedShapes_;
private Map<String, Integer> feedNameToIndex_; private Map<String, Integer> feedNameToIndex_;
private Map<Integer, String> feedTypeToDataKey_;
private List<String> fetchNames_; private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_; private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_; private Set<String> lodTensorSet_;
...@@ -134,7 +144,7 @@ public class Client { ...@@ -134,7 +144,7 @@ public class Client {
feedTensorLen_ = null; feedTensorLen_ = null;
feedNameToIndex_ = null; feedNameToIndex_ = null;
timeoutS_ = 200000; timeoutS_ = 200000;
ip = "0.0.0.0"; ip = "127.0.0.1";
port = "9393"; port = "9393";
serverPort = "9393"; serverPort = "9393";
serviceName = "/GeneralModelService/inference"; serviceName = "/GeneralModelService/inference";
...@@ -147,12 +157,6 @@ public class Client { ...@@ -147,12 +157,6 @@ public class Client {
channel_ = null; channel_ = null;
blockingStub_ = null; blockingStub_ = null;
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(0, "int64_data");
feedTypeToDataKey_.put(1, "float_data");
feedTypeToDataKey_.put(2, "int_data");
feedTypeToDataKey_.put(3, "data");
profiler_ = new Profiler(); profiler_ = new Profiler();
boolean is_profile = false; boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client"); String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
...@@ -525,7 +529,7 @@ public class Client { ...@@ -525,7 +529,7 @@ public class Client {
jsonTensor.put("elem_type", element_type); jsonTensor.put("elem_type", element_type);
// 处理数据与shape // 处理数据与shape
String protoDataKey = feedTypeToDataKey_.get(element_type); String protoDataKey = ElementType.feedTypeToDataKey_.get(element_type);
// 如果是INDArray类型,先转为一维. // 如果是INDArray类型,先转为一维.
// 此时shape为INDArray的shape // 此时shape为INDArray的shape
if(objectValue instanceof INDArray){ if(objectValue instanceof INDArray){
...@@ -535,11 +539,11 @@ public class Client { ...@@ -535,11 +539,11 @@ public class Client {
for(long dim:indarrayShape){ for(long dim:indarrayShape){
shape.add((int)dim); shape.add((int)dim);
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
objectValue = tempIndArray.data().asLong(); objectValue = tempIndArray.data().asLong();
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
objectValue = tempIndArray.data().asInt(); objectValue = tempIndArray.data().asInt();
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
objectValue = tempIndArray.data().asFloat(); objectValue = tempIndArray.data().asFloat();
}else{ }else{
throw new Exception("INDArray 类型不支持"); throw new Exception("INDArray 类型不支持");
...@@ -564,11 +568,11 @@ public class Client { ...@@ -564,11 +568,11 @@ public class Client {
// 此时无法获取batch信息,故对shape不处理 // 此时无法获取batch信息,故对shape不处理
// 由于Proto中为Repeated,需要把数据包装成list // 由于Proto中为Repeated,需要把数据包装成list
if(objectValue instanceof String){ if(objectValue instanceof String){
if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){ if(feedTypes_.get(protoDataKey)!= ElementType.String_type){
throw new Exception("feedvar is not string-type,feed can`t be a single string."); throw new Exception("feedvar is not string-type,feed can`t be a single string.");
} }
}else{ }else{
if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){ if(feedTypes_.get(protoDataKey)== ElementType.String_type){
throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others."); throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
} }
} }
...@@ -662,17 +666,17 @@ public class Client { ...@@ -662,17 +666,17 @@ public class Client {
for(long dim:indarrayShape){ for(long dim:indarrayShape){
shape.add((int)dim); shape.add((int)dim);
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList()); List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter); tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList()); List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter); tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat())); List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat()));
tensor_builder.addAllFloatData(iter); tensor_builder.addAllFloatData(iter);
...@@ -684,13 +688,13 @@ public class Client { ...@@ -684,13 +688,13 @@ public class Client {
// 如果是数组类型,则无须处理,直接使用即可。 // 如果是数组类型,则无须处理,直接使用即可。
// 且数组无法嵌套,此时batch无法从数据中获取 // 且数组无法嵌套,此时batch无法从数据中获取
// 默认batch维度为1,或者feedVar的shape信息中已包含batch // 默认batch维度为1,或者feedVar的shape信息中已包含batch
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList()); List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter); tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList()); List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter); tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue)); List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue));
tensor_builder.addAllFloatData(iter); tensor_builder.addAllFloatData(iter);
}else{ }else{
...@@ -707,11 +711,11 @@ public class Client { ...@@ -707,11 +711,11 @@ public class Client {
// 在index=0处,加上batch // 在index=0处,加上batch
shape.add(0, list.size()); shape.add(0, list.size());
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue)); tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue)); tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue)); tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue));
}else{ }else{
// 看接口是String还是Bytes // 看接口是String还是Bytes
...@@ -723,11 +727,11 @@ public class Client { ...@@ -723,11 +727,11 @@ public class Client {
// 由于Proto中为Repeated,需要把数据包装成list // 由于Proto中为Repeated,需要把数据包装成list
List<Object> tempList = new ArrayList<>(); List<Object> tempList = new ArrayList<>();
tempList.add(objectValue); tempList.add(objectValue);
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)tempList); tensor_builder.addAllInt64Data((List<Long>)(List)tempList);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)tempList); tensor_builder.addAllIntData((List<Integer>)(List)tempList);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)tempList); tensor_builder.addAllFloatData((List<Float>)(List)tempList);
}else{ }else{
// 看接口是String还是Bytes // 看接口是String还是Bytes
......
...@@ -12,41 +12,96 @@ ...@@ -12,41 +12,96 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true; option java_multiple_files = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response) {} rpc debug(Request) returns (Response);
}; };
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <dirent.h>
#include <pthread.h> #include <pthread.h>
#include <fstream> #include <fstream>
#include <map> #include <map>
...@@ -69,7 +70,33 @@ PrecisionType GetPrecision(const std::string& precision_data) { ...@@ -69,7 +70,33 @@ PrecisionType GetPrecision(const std::string& precision_data) {
return PrecisionType::kFloat32; return PrecisionType::kFloat32;
} }
// Engine Base const std::string getFileBySuffix(
const std::string& path, const std::vector<std::string>& suffixVector) {
DIR* dp = nullptr;
std::string fileName = "";
struct dirent* dirp = nullptr;
if ((dp = opendir(path.c_str())) == nullptr) {
return fileName;
}
while ((dirp = readdir(dp)) != nullptr) {
if (dirp->d_type == DT_REG) {
for (int idx = 0; idx < suffixVector.size(); ++idx) {
if (std::string(dirp->d_name).find(suffixVector[idx]) !=
std::string::npos) {
fileName = static_cast<std::string>(dirp->d_name);
break;
}
}
}
if (fileName.length() != 0) break;
}
closedir(dp);
return fileName;
}
// Engine Core is the base class of inference engines, which can be derived from
// paddle Inference Engine, or inference engines of other machine learning
// platforms
class EngineCore { class EngineCore {
public: public:
virtual ~EngineCore() {} virtual ~EngineCore() {}
...@@ -116,6 +143,11 @@ class EngineCore { ...@@ -116,6 +143,11 @@ class EngineCore {
virtual void* get() { return _predictor.get(); } virtual void* get() { return _predictor.get(); }
protected: protected:
// _predictor is a prediction instance of Paddle Inference.
// when inferring on the CPU, _predictor is bound to a model.
// when inferring on the GPU, _predictor is bound to a model and a GPU card.
// Therefore, when using GPU multi-card inference, you need to create multiple
// EngineCore.
std::shared_ptr<Predictor> _predictor; std::shared_ptr<Predictor> _predictor;
}; };
...@@ -131,9 +163,21 @@ class PaddleInferenceEngine : public EngineCore { ...@@ -131,9 +163,21 @@ class PaddleInferenceEngine : public EngineCore {
} }
Config config; Config config;
// todo, auto config(zhangjun) std::vector<std::string> suffixParaVector = {".pdiparams", "__params__"};
if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) { std::vector<std::string> suffixModelVector = {".pdmodel", "__model__"};
std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
std::string modelFileName = getFileBySuffix(model_path, suffixModelVector);
std::string encryParaPath = model_path + "/encrypt_model";
std::string encryModelPath = model_path + "/encrypt_params";
std::string encryKeyPath = model_path + "/key";
// encrypt model
if (access(encryParaPath.c_str(), F_OK) != -1 &&
access(encryModelPath.c_str(), F_OK) != -1 &&
access(encryKeyPath.c_str(), F_OK) != -1) {
// decrypt model // decrypt model
std::string model_buffer, params_buffer, key_buffer; std::string model_buffer, params_buffer, key_buffer;
predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer); predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer); predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
...@@ -147,16 +191,11 @@ class PaddleInferenceEngine : public EngineCore { ...@@ -147,16 +191,11 @@ class PaddleInferenceEngine : public EngineCore {
real_model_buffer.size(), real_model_buffer.size(),
&real_params_buffer[0], &real_params_buffer[0],
real_params_buffer.size()); real_params_buffer.size());
} else if (engine_conf.has_combined_model()) { } else if (paraFileName.length() != 0 && modelFileName.length() != 0) {
if (!engine_conf.combined_model()) { config.SetParamsFile(model_path + "/" + paraFileName);
config.SetModel(model_path); config.SetProgFile(model_path + "/" + modelFileName);
} else {
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
}
} else { } else {
config.SetParamsFile(model_path + "/__params__"); config.SetModel(model_path);
config.SetProgFile(model_path + "/__model__");
} }
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
......
## Bert as service Http## Bert as service
([简体中文](./README_CN.md)|English) ([简体中文](./README_CN.md)|English)
...@@ -42,48 +42,36 @@ sh get_data.sh ...@@ -42,48 +42,36 @@ sh get_data.sh
``` ```
this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt this script will download Chinese Dictionary File vocab.txt and Chinese Sample Data data-c.txt
### RPC Inference Service ### Inference Service(Support BRPC-Client、GRPC-Client、Http-Client)
start cpu inference service,Run start cpu inference service,Run
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #cpu inference service python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #cpu inference service
``` ```
Or,start gpu inference service,Run Or,start gpu inference service,Run
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
``` ```
### RPC Inference ### BRPC-Client Inference
before prediction we should install paddle_serving_app. This module provides data preprocessing for BERT model. before prediction we should install paddle_serving_app. This module provides data preprocessing for BERT model.
``` ```
pip install paddle_serving_app pip3 install paddle_serving_app
``` ```
Run Run
``` ```
head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt head data-c.txt | python3 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
``` ```
the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it). the client reads data from data-c.txt and send prediction request, the prediction is given by word vector. (Due to massive data in the word vector, we do not print it).
### HTTP Inference Service #### GRPC-Client/HTTP-Client
start cpu HTTP inference service,Run Run
```
python bert_web_service.py bert_seq128_model/ 9292 #launch cpu inference service
``` ```
head data-c.txt | python3 bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
Or,start gpu HTTP inference service,Run
```
export CUDA_VISIBLE_DEVICES=0,1
```
set environmental variable to specify which gpus are used, the command above means gpu 0 and gpu 1 is used.
``` ```
python bert_web_service_gpu.py bert_seq128_model/ 9292 #launch gpu inference service
```
### HTTP Inference
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
```
## Benchmark ## Benchmark
``` shell ``` shell
......
...@@ -40,15 +40,15 @@ sh get_data.sh ...@@ -40,15 +40,15 @@ sh get_data.sh
``` ```
脚本将下载中文词典vocab.txt和中文样例数据data-c.txt 脚本将下载中文词典vocab.txt和中文样例数据data-c.txt
### 启动RPC预测服务 ### 启动预测服务(支持BRPC-Client、GRPC-Client、HTTP-Client三种方式访问)
启动cpu预测服务,执行 启动cpu预测服务,执行
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #启动cpu预测服务 python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #启动cpu预测服务
``` ```
或者,启动gpu预测服务,执行 或者,启动gpu预测服务,执行
``` ```
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 python3 -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
``` ```
...@@ -56,37 +56,22 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --g ...@@ -56,37 +56,22 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --g
执行预测前需要安装paddle_serving_app,模块中提供了BERT模型的数据预处理方法。 执行预测前需要安装paddle_serving_app,模块中提供了BERT模型的数据预处理方法。
``` ```
pip install paddle_serving_app pip3 install paddle_serving_app
``` ```
#### BRPC-Client
执行 执行
``` ```
head data-c.txt | python bert_client.py --model bert_seq128_client/serving_client_conf.prototxt head data-c.txt | python3 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
``` ```
启动client读取data-c.txt中的数据进行预测,预测结果为文本的向量表示(由于数据较多,脚本中没有将输出进行打印),server端的地址在脚本中修改。 启动client读取data-c.txt中的数据进行预测,预测结果为文本的向量表示(由于数据较多,脚本中没有将输出进行打印),server端的地址在脚本中修改。
#### GRPC-Client/HTTP-Client
执行
### 启动HTTP预测服务
启动cpu HTTP预测服务,执行
```
python bert_web_service.py bert_seq128_model/ 9292 #启动CPU预测服务
```
或者,启动gpu HTTP预测服务,执行
```
export CUDA_VISIBLE_DEVICES=0,1
```
通过环境变量指定gpu预测服务使用的gpu,示例中指定索引为0和1的两块gpu
```
python bert_web_service_gpu.py bert_seq128_model/ 9292 #启动gpu预测服务
``` ```
head data-c.txt | python3 bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt
### 执行预测
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:9292/bert/prediction
``` ```
## 性能测试 ## 性能测试
......
# coding=utf-8 # coding:utf-8
# pylint: disable=doc-string-missing
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -12,37 +13,46 @@ ...@@ -12,37 +13,46 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys import sys
import os from paddle_serving_client import HttpClient
from paddle_serving_client.utils import benchmark_args
from paddle_serving_app.reader import ChineseBertReader
import numpy as np import numpy as np
args = benchmark_args()
reader = ChineseBertReader({"max_seq_len": 128})
fetch = ["pooled_output"]
endpoint_list = ['127.0.0.1:9292']
client = HttpClient()
client.load_client_config(args.model)
'''
if you want use GRPC-client, set_use_grpc_client(True)
or you can directly use client.grpc_client_predict(...)
as for HTTP-client,set_use_grpc_client(False)(which is default)
or you can directly use client.http_client_predict(...)
'''
#client.set_use_grpc_client(True)
'''
if you want to enable Encrypt Module,uncommenting the following line
'''
#client.use_key("./key")
'''
if you want to compress,uncommenting the following line
'''
#client.set_response_compress(True)
#client.set_request_compress(True)
'''
we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False
'''
#client.set_http_proto(True)
client.connect(endpoint_list)
class BertService(WebService): for line in sys.stdin:
def load(self): feed_dict = reader.process(line)
self.reader = ChineseBertReader({ for key in feed_dict.keys():
"vocab_file": "vocab.txt", feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
"max_seq_len": 128 #print(feed_dict)
}) result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
print(result)
def preprocess(self, feed=[], fetch=[]):
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="cpu")
bert_service.run_rpc_service()
bert_service.run_web_service()
# coding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
def load(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, feed=[], fetch=[]):
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.set_gpus("0")
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="gpu")
bert_service.run_rpc_service()
bert_service.run_web_service()
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model blazeface python3 -m paddle_serving_app.package --get_model blazeface
tar -xf blazeface.tar.gz tar -xf blazeface.tar.gz
``` ```
...@@ -11,13 +11,13 @@ tar -xf blazeface.tar.gz ...@@ -11,13 +11,13 @@ tar -xf blazeface.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9494 python3 -m paddle_serving_server.serve --model serving_server --port 9494
``` ```
### Client Prediction ### Client Prediction
``` ```
python test_client.py serving_client/serving_client_conf.prototxt test.jpg python3 test_client.py serving_client/serving_client_conf.prototxt test.jpg
``` ```
the result is in `output` folder, including a json file and image file with bounding boxes. the result is in `output` folder, including a json file and image file with bounding boxes.
...@@ -10,12 +10,12 @@ If you want to have more detection models, please refer to [Paddle Detection Mod ...@@ -10,12 +10,12 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
### Start the service ### Start the service
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0 python3 -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### Perform prediction ### Perform prediction
``` ```
python test_client.py python3 test_client.py 000000570688.jpg
``` ```
Image with bounding boxes and json result would be saved in `output` folder. Image with bounding boxes and json result would be saved in `output` folder.
...@@ -10,12 +10,12 @@ sh get_data.sh ...@@ -10,12 +10,12 @@ sh get_data.sh
### 启动服务 ### 启动服务
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0 python3 -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### 执行预测 ### 执行预测
``` ```
python test_client.py python3 test_client.py 000000570688.jpg
``` ```
客户端已经为图片做好了后处理,在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。 客户端已经为图片做好了后处理,在`output`文件夹下存放各个框的json格式信息还有后处理结果图片。
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz tar xf cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
background
person person
bicycle bicycle
car car
......
...@@ -12,29 +12,35 @@ ...@@ -12,29 +12,35 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import sys
import numpy as np
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_app.reader import * from paddle_serving_app.reader import *
import numpy as np import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionResize((800, 1333), True, interpolation=2),
Resize(800, 1333), Transpose((2, 0, 1)), PadStride(32) DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
DetectionTranspose((2,0,1)),
DetectionPadStride(32)
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
client = Client() client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9292']) client.connect(['127.0.0.1:9292'])
im = preprocess('000000570688.jpg')
im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_info": np.array(list(im.shape[1:]) + [1.0]), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"im_shape": np.array(list(im.shape[1:]) + [1.0]) "scale_factor": im_info['scale_factor'],
}, },
fetch=["multiclass_nms_0.tmp_0"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
fetch_map["image"] = '000000570688.jpg'
print(fetch_map) print(fetch_map)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map) postprocess(fetch_map)
print(fetch_map)
...@@ -19,13 +19,13 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear. ...@@ -19,13 +19,13 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear.
### Start RPC Inference Service ### Start RPC Inference Service
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0 python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
``` ```
### RPC Infer ### RPC Infer
``` ```
python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 python3 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
``` ```
the latency will display in the end. the latency will display in the end.
...@@ -19,13 +19,13 @@ mv models/ctr_serving_model . ...@@ -19,13 +19,13 @@ mv models/ctr_serving_model .
### 启动RPC预测服务 ### 启动RPC预测服务
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务 python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务 python3 -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
``` ```
### 执行预测 ### 执行预测
``` ```
python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0 python3 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
``` ```
预测完毕会输出预测过程的耗时。 预测完毕会输出预测过程的耗时。
...@@ -32,13 +32,13 @@ Here, the sparse parameter is loaded by cube sparse parameter indexing service C ...@@ -32,13 +32,13 @@ Here, the sparse parameter is loaded by cube sparse parameter indexing service C
### Start RPC Predictor, the number of serving thread is 4(configurable in test_server.py) ### Start RPC Predictor, the number of serving thread is 4(configurable in test_server.py)
``` ```
python test_server.py ctr_serving_model_kv python3 test_server.py ctr_serving_model_kv
``` ```
### Run Prediction ### Run Prediction
``` ```
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data python3 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
``` ```
### Benchmark ### Benchmark
......
...@@ -30,13 +30,13 @@ sh cube_prepare.sh & ...@@ -30,13 +30,13 @@ sh cube_prepare.sh &
### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置) ### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置)
``` ```
python test_server.py ctr_serving_model_kv python3 test_server.py ctr_serving_model_kv
``` ```
### 执行预测 ### 执行预测
``` ```
python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data python3 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
``` ```
### Benchmark ### Benchmark
......
[{
"dict_name": "test_dict",
"shard": 1,
"dup": 1,
"timeout": 200,
"retry": 3,
"backup_request": 100,
"type": "ipport_list",
"load_balancer": "rr",
"nodes": [{
"ipport_list": "list://127.0.0.1:8027"
}]
}]
--port=8027
--dict_split=1
--in_mem=true
--log_dir=./log/
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
from paddle_serving_client import Client from paddle_serving_client import Client
import sys import sys
import os import os
import criteo as criteo import criteo_reader as criteo
import time import time
from paddle_serving_client.metric import auc from paddle_serving_client.metric import auc
import numpy as np import numpy as np
...@@ -35,22 +35,23 @@ reader = dataset.infer_reader(test_filelists, batch, buf_size) ...@@ -35,22 +35,23 @@ reader = dataset.infer_reader(test_filelists, batch, buf_size)
label_list = [] label_list = []
prob_list = [] prob_list = []
start = time.time() start = time.time()
for ei in range(10000): for ei in range(100):
if py_version == 2: if py_version == 2:
data = reader().next() data = reader().next()
else: else:
data = reader().__next__() data = reader().__next__()
feed_dict = {} feed_dict = {}
feed_dict['dense_input'] = data[0][0] feed_dict['dense_input'] = np.array(data[0][0]).reshape(1, len(data[0][0]))
for i in range(1, 27): for i in range(1, 27):
feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(-1) feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(len(data[0][i]))
feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, len(data[0][i])] feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, len(data[0][i])]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"]) fetch_map = client.predict(feed=feed_dict, fetch=["prob"],batch=True)
print(fetch_map) print(fetch_map)
prob_list.append(fetch_map['prob'][0][1]) prob_list.append(fetch_map['prob'][0][1])
label_list.append(data[0][-1][0]) label_list.append(data[0][-1][0])
print(auc(label_list, prob_list))
end = time.time() end = time.time()
print(end - start) print(end - start)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model deeplabv3 python3 -m paddle_serving_app.package --get_model deeplabv3
tar -xzvf deeplabv3.tar.gz tar -xzvf deeplabv3.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf deeplabv3.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python3 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
``` ```
python deeplabv3_client.py python3 deeplabv3_client.py
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model deeplabv3 python3 -m paddle_serving_app.package --get_model deeplabv3
tar -xzvf deeplabv3.tar.gz tar -xzvf deeplabv3.tar.gz
``` ```
...@@ -12,10 +12,10 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,10 +12,10 @@ tar -xzvf deeplabv3.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python3 -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
``` ```
python deeplabv3_client.py python3 deeplabv3_client.py
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
### Get The Faster RCNN HRNet Model ### Get The Faster RCNN HRNet Model
``` ```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_hrnetv2p_w18_1x.tar wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_hrnetv2p_w18_1x.tar.gz
``` ```
### Start the service ### Start the service
``` ```
tar xf faster_rcnn_hrnetv2p_w18_1x.tar tar xf faster_rcnn_hrnetv2p_w18_1x.tar.gz
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work. This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
...@@ -19,5 +19,5 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m ...@@ -19,5 +19,5 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m
### Prediction ### Prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -4,19 +4,19 @@ ...@@ -4,19 +4,19 @@
## 获得Faster RCNN HRNet模型 ## 获得Faster RCNN HRNet模型
``` ```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_hrnetv2p_w18_1x.tar wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_hrnetv2p_w18_1x.tar.gz
``` ```
### 启动服务 ### 启动服务
``` ```
tar xf faster_rcnn_hrnetv2p_w18_1x.tar tar xf faster_rcnn_hrnetv2p_w18_1x.tar.gz
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape. 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40 请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
from paddle_serving_client import Client # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
from paddle_serving_app.reader import * #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionResize((800, 1333), True, interpolation=2),
Resize(640, 640), Transpose((2, 0, 1)) DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
DetectionTranspose((2,0,1)),
DetectionPadStride(32)
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -15,13 +32,15 @@ client = Client() ...@@ -15,13 +32,15 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_info": np.array(list(im.shape[1:]) + [1.0]), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"im_shape": np.array(list(im.shape[1:]) + [1.0]) "scale_factor": im_info['scale_factor'],
}, },
fetch=["multiclass_nms_0.tmp_0"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map) print(fetch_map)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work. This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
...@@ -19,7 +19,7 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m ...@@ -19,7 +19,7 @@ Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/m
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
## 3. Result analysis ## 3. Result analysis
......
...@@ -11,14 +11,14 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,14 +11,14 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape. 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40 请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
## 3. 结果分析 ## 3. 结果分析
......
...@@ -12,15 +12,19 @@ ...@@ -12,15 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
Resize(640, 640), Transpose((2, 0, 1)) DetectionResize(
(800, 1333), True, interpolation=cv2.INTER_LINEAR),
DetectionTranspose((2,0,1)),
DetectionPadStride(128)
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,15 +33,14 @@ client = Client() ...@@ -29,15 +33,14 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_shape": np.array(list(im.shape[1:])).reshape(-1), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map)
fetch_map["image"] = sys.argv[1] fetch_map["image"] = sys.argv[1]
postprocess(fetch_map) postprocess(fetch_map)
...@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf fcos_dcn_r50_fpn_1x_coco.tar tar xf fcos_dcn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000014439.jpg
``` ```
...@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf fcos_dcn_r50_fpn_1x_coco.tar tar xf fcos_dcn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000014439.jpg
``` ```
...@@ -12,15 +12,19 @@ ...@@ -12,15 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
Resize(640, 640), Transpose((2, 0, 1)) DetectionResize(
(800, 1333), True, interpolation=cv2.INTER_LINEAR),
DetectionTranspose((2,0,1)),
DetectionPadStride(128)
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,12 +33,14 @@ client = Client() ...@@ -29,12 +33,14 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map) print(fetch_map)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
...@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -12,15 +12,18 @@ ...@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
Resize((608, 608)), Transpose((2, 0, 1)) DetectionResize(
(608, 608), False, interpolation=2),
DetectionTranspose((2,0,1))
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,15 +32,14 @@ client = Client() ...@@ -29,15 +32,14 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_shape": np.array(list(im.shape[1:])).reshape(-1), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map)
fetch_map["image"] = sys.argv[1] fetch_map["image"] = sys.argv[1]
postprocess(fetch_map) postprocess(fetch_map)
...@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,11 +10,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ssd_vgg16_300_240e_voc.tar tar xf ssd_vgg16_300_240e_voc.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000014439.jpg
``` ```
...@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,12 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ssd_vgg16_300_240e_voc.tar tar xf ssd_vgg16_300_240e_voc.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000014439.jpg
``` ```
person aeroplane
bicycle bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird bird
boat
bottle
bus
car
cat cat
chair
cow
diningtable
dog dog
horse horse
motorbike
person
pottedplant
sheep sheep
cow sofa
elephant train
bear tvmonitor
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
...@@ -12,15 +12,18 @@ ...@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), DetectionFile2Image(),
Normalize([123.675, 116.28, 103.53], [58.395, 57.12, 57.375], False), DetectionResize(
Resize((512, 512)), Transpose((2, 0, 1)) (300, 300), False, interpolation=cv2.INTER_LINEAR),
DetectionNormalize([104.0, 117.0, 123.0], [1.0, 1.0, 1.0], False),
DetectionTranspose((2,0,1)),
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,13 +32,15 @@ client = Client() ...@@ -29,13 +32,15 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_shape": np.array([512, 512]), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map) print(fetch_map)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
...@@ -4,18 +4,17 @@ ...@@ -4,18 +4,17 @@
### Get Model ### Get Model
``` ```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ttfnet_darknet53_1x_coco.tar wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/ttfnet_darknet53_1x_coco.tar
``` ```
### Start the service ### Start the service
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -4,20 +4,19 @@ ...@@ -4,20 +4,19 @@
## 获得模型 ## 获得模型
``` ```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ttfnet_darknet53_1x_coco.tar wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/ttfnet_darknet53_1x_coco.tar
``` ```
### 启动服务 ### 启动服务
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -11,16 +11,18 @@ ...@@ -11,16 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), DetectionFile2Image(),
Normalize([123.675, 116.28, 103.53], [58.395, 57.12, 57.375], False), DetectionResize(
Resize((512, 512)), Transpose((2, 0, 1)) (512, 512), False, interpolation=cv2.INTER_LINEAR),
DetectionNormalize([123.675, 116.28, 103.53], [58.395, 57.12, 57.375], False),
DetectionTranspose((2,0,1))
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,11 +31,14 @@ client = Client() ...@@ -29,11 +31,14 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
......
...@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,13 +10,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
### Perform prediction ### Perform prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,13 +11,12 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python3 -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
### 执行预测 ### 执行预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
...@@ -12,15 +12,18 @@ ...@@ -12,15 +12,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_client import Client
from paddle_serving_app.reader import *
import cv2
preprocess = Sequential([ preprocess = DetectionSequential([
File2Image(), BGR2RGB(), Div(255.0), DetectionFile2Image(),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), DetectionResize(
Resize((608, 608)), Transpose((2, 0, 1)) (608, 608), False, interpolation=2),
DetectionNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True),
DetectionTranspose((2,0,1)),
]) ])
postprocess = RCNNPostprocess("label_list.txt", "output") postprocess = RCNNPostprocess("label_list.txt", "output")
...@@ -29,15 +32,14 @@ client = Client() ...@@ -29,15 +32,14 @@ client = Client()
client.load_client_config("serving_client/serving_client_conf.prototxt") client.load_client_config("serving_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9494']) client.connect(['127.0.0.1:9494'])
im = preprocess(sys.argv[1]) im, im_info = preprocess(sys.argv[1])
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
"im_shape": np.array(list(im.shape[1:])).reshape(-1), "im_shape": np.array(list(im.shape[1:])).reshape(-1),
"scale_factor": np.array([1.0, 1.0]).reshape(-1), "scale_factor": im_info['scale_factor'],
}, },
fetch=["save_infer_model/scale_0.tmp_1"], fetch=["save_infer_model/scale_0.tmp_1"],
batch=False) batch=False)
print(fetch_map)
fetch_map["image"] = sys.argv[1] fetch_map["image"] = sys.argv[1]
postprocess(fetch_map) postprocess(fetch_map)
...@@ -12,9 +12,9 @@ sh get_data.sh ...@@ -12,9 +12,9 @@ sh get_data.sh
## Encrypt Model ## Encrypt Model
The `paddlepaddle` package is used in this example, you may need to download the corresponding package(`pip install paddlepaddle`). The `paddlepaddle` package is used in this example, you may need to download the corresponding package(`pip3 install paddlepaddle`).
[python encrypt.py](./encrypt.py) [python3 encrypt.py](./encrypt.py)
[//file]:#encrypt.py [//file]:#encrypt.py
``` python ``` python
...@@ -35,14 +35,14 @@ client-side configuration file are stored in the `encrypt_client` directory. ...@@ -35,14 +35,14 @@ client-side configuration file are stored in the `encrypt_client` directory.
## Start Encryption Service ## Start Encryption Service
CPU Service CPU Service
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
``` ```
GPU Service GPU Service
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0 python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
``` ```
## Prediction ## Prediction
``` ```
python test_client.py encrypt_client/serving_client_conf.prototxt python3 test_client.py encrypt_client/serving_client_conf.prototxt
``` ```
...@@ -11,9 +11,9 @@ sh get_data.sh ...@@ -11,9 +11,9 @@ sh get_data.sh
``` ```
## 模型加密 ## 模型加密
本示例中使用了`paddlepaddle`包中的模块,需要进行下载(`pip install paddlepaddle`)。 本示例中使用了`paddlepaddle`包中的模块,需要进行下载(`pip3 install paddlepaddle`)。
运行[python encrypt.py](./encrypt.py)进行模型加密 运行[python3 encrypt.py](./encrypt.py)进行模型加密
[//file]:#encrypt.py [//file]:#encrypt.py
``` python ``` python
...@@ -36,14 +36,14 @@ def serving_encryption(): ...@@ -36,14 +36,14 @@ def serving_encryption():
## 启动加密预测服务 ## 启动加密预测服务
CPU预测服务 CPU预测服务
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
``` ```
GPU预测服务 GPU预测服务
``` ```
python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0 python3 -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
``` ```
## 预测 ## 预测
``` ```
python test_client.py encrypt_client/serving_client_conf.prototxt python3 test_client.py encrypt_client/serving_client_conf.prototxt
``` ```
...@@ -15,22 +15,22 @@ sh get_data.sh ...@@ -15,22 +15,22 @@ sh get_data.sh
### Start server ### Start server
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
``` ```
## Client prediction ## Client prediction
### RPC Client ### RPC Client
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`). The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip3 install paddlepaddle`).
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python3 test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
### Http Client ### Http Client
``` shell ``` shell
python test_httpclient.py uci_housing_client/serving_client_conf.prototxt python3 test_httpclient.py uci_housing_client/serving_client_conf.prototxt
``` ```
......
...@@ -9,28 +9,26 @@ sh get_data.sh ...@@ -9,28 +9,26 @@ sh get_data.sh
``` ```
## 开启服务端(支持BRPC-Client/GRPC Client/Http-Client)
## 开启服务端
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
``` ```
## 客户端预测 ## 客户端预测
### 客户端RPC ### BRPC-Client
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。 `test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip3 install paddlepaddle`)。
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python3 test_client.py uci_housing_client/serving_client_conf.prototxt
``` ```
### 客户端Http预测 ### GRPC-Client/Http-Client
``` shell ``` shell
python test_httpclient.py uci_housing_client/serving_client_conf.prototxt python3 test_httpclient.py uci_housing_client/serving_client_conf.prototxt
``` ```
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_client.httpclient import GeneralClient from paddle_serving_client.httpclient import HttpClient
import sys import sys
import numpy as np import numpy as np
import time import time
client = GeneralClient() client = HttpClient()
client.load_client_config(sys.argv[1]) client.load_client_config(sys.argv[1])
''' '''
if you want use GRPC-client, set_use_grpc_client(True) if you want use GRPC-client, set_use_grpc_client(True)
...@@ -41,13 +41,14 @@ we recommend use Proto data format in HTTP-body, set True(which is default) ...@@ -41,13 +41,14 @@ we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False if you want use JSON data format in HTTP-body, set False
''' '''
#client.set_http_proto(True) #client.set_http_proto(True)
client.connect(["127.0.0.1:9393"])
fetch_list = client.get_fetch_names()
import paddle import paddle
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.uci_housing.test(), buf_size=500), paddle.dataset.uci_housing.test(), buf_size=500),
batch_size=1) batch_size=1)
fetch_list = client.get_fetch_names()
for data in test_reader(): for data in test_reader():
new_data = np.zeros((1, 13)).astype("float32") new_data = np.zeros((1, 13)).astype("float32")
new_data[0] = data[0][0] new_data[0] = data[0][0]
......
...@@ -12,38 +12,30 @@ sh get_model.sh ...@@ -12,38 +12,30 @@ sh get_model.sh
### Install preprocess module ### Install preprocess module
``` ```
pip install paddle_serving_app pip3 install paddle_serving_app
``` ```
### HTTP Service
launch server side
```
python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu inference service
```
```
python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu inference service
```
### Inference Service(Support BRPC-Client/GRPC-Client/Http-Client)
client send inference request launch server side
``` ```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service
``` ```
### RPC Service
launch server side
``` ```
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu inference service python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
``` ```
### BRPC-Client
client send inference request
``` ```
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service python3 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
``` ```
*the port of server side in this example is 9696
### GRPC-Client/Http-Client
client send inference request client send inference request
``` ```
python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt python3 resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
``` ```
*the port of server side in this example is 9696
...@@ -12,38 +12,30 @@ sh get_model.sh ...@@ -12,38 +12,30 @@ sh get_model.sh
### 安装数据预处理模块 ### 安装数据预处理模块
``` ```
pip install paddle_serving_app pip3 install paddle_serving_app
``` ```
### HTTP服务 ### 启动服务端(支持BRPC-Client、GRPC-Client、Http-Client)
启动server端 启动server端
``` ```
python resnet50_web_service.py ResNet50_vd_model cpu 9696 #cpu预测服务 python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务
``` ```
```
python resnet50_web_service.py ResNet50_vd_model gpu 9696 #gpu预测服务
```
发送HTTP POST请求
``` ```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:9696/image/prediction python3 -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
``` ```
### RPC服务 ### BRPC-Client预测
client端进行预测
启动server端
``` ```
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu预测服务 python3 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
``` ```
*server端示例中服务端口为9696端口
```
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
```
### GRPC-Client/Http-Client预测
client端进行预测 client端进行预测
``` ```
python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt python3 resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
``` ```
*server端示例中服务端口为9696端口
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle_serving_client import HttpClient
from paddle_serving_app.reader import Sequential, URL2Image, Resize
from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize
import time
client = HttpClient()
client.load_client_config(sys.argv[1])
'''
if you want use GRPC-client, set_use_grpc_client(True)
or you can directly use client.grpc_client_predict(...)
as for HTTP-client,set_use_grpc_client(False)(which is default)
or you can directly use client.http_client_predict(...)
'''
#client.set_use_grpc_client(True)
'''
if you want to enable Encrypt Module,uncommenting the following line
'''
#client.use_key("./key")
'''
if you want to compress,uncommenting the following line
'''
#client.set_response_compress(True)
#client.set_request_compress(True)
'''
we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False
'''
#client.set_http_proto(True)
client.connect(["127.0.0.1:9696"])
label_dict = {}
label_idx = 0
with open("imagenet.label") as fin:
for line in fin:
label_dict[label_idx] = line.strip()
label_idx += 1
seq = Sequential([
URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
])
start = time.time()
image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
for i in range(10):
img = seq(image_file)
fetch_map = client.predict(
feed={"image": img}, fetch=["score"], batch=False)
print(fetch_map)
end = time.time()
print(end - start)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle_serving_client import Client
import numpy as np
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
if len(sys.argv) != 4:
print("python resnet50_web_service.py model device port")
sys.exit(-1)
device = sys.argv[2]
if device == "cpu":
from paddle_serving_server.web_service import WebService
else:
from paddle_serving_server.web_service import WebService
class ImageService(WebService):
def init_imagenet_setting(self):
self.seq = Sequential([
URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose(
(2, 0, 1)), Div(255), Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225], True)
])
self.label_dict = {}
label_idx = 0
with open("imagenet.label") as fin:
for line in fin:
self.label_dict[label_idx] = line.strip()
label_idx += 1
def preprocess(self, feed=[], fetch=[]):
feed_batch = []
is_batch = True
for ins in feed:
if "image" not in ins:
raise ("feed data error!")
img = self.seq(ins["image"])
feed_batch.append({"image": img[np.newaxis, :]})
return feed_batch, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map={}):
score_list = fetch_map["score"]
result = {"label": [], "prob": []}
for score in score_list:
score = score.tolist()
max_score = max(score)
result["label"].append(self.label_dict[score.index(max_score)]
.strip().replace(",", ""))
result["prob"].append(max_score)
return result
image_service = ImageService(name="image")
image_service.load_model_config(sys.argv[1])
image_service.init_imagenet_setting()
if device == "gpu":
image_service.set_gpus("0")
image_service.prepare_server(
workdir="workdir", port=int(sys.argv[3]), device=device)
image_service.run_rpc_service()
image_service.run_web_service()
...@@ -9,24 +9,20 @@ sh get_data.sh ...@@ -9,24 +9,20 @@ sh get_data.sh
``` ```
the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data. the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
### Start RPC inference service ### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
``` ```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
``` ```
### RPC Infer ### BRPC-Client Infer
``` ```
head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
``` ```
it will get predict results of the first 10 test cases. it will get predict results of the first 10 test cases.
### Start HTTP inference service
```
python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
```
### HTTP Infer
### GRPC-Client/Http-Client Infer
``` ```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
``` ```
...@@ -9,23 +9,18 @@ sh get_data.sh ...@@ -9,23 +9,18 @@ sh get_data.sh
``` ```
脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。 脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
### 启动RPC预测服务 ### 启动预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
``` ```
python -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292
``` ```
### 执行预测 ### BRPC-Client预测
``` ```
head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
``` ```
预测test_data/part-0的前十个样例。 预测test_data/part-0的前十个样例。
### 启动HTTP预测服务 ### BRPC-Client预测
``` ```
python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
```
### 执行预测
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
``` ```
wget https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_service.tar.gz
tar -xzf imdb_service.tar.gz
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
tar -zxvf text_classification_data.tar.gz
python text_classify_service.py serving_server_model/ workdir imdb.vocab
...@@ -12,37 +12,50 @@ ...@@ -12,37 +12,50 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_client import HttpClient
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader.imdb_reader import IMDBDataset from paddle_serving_app.reader.imdb_reader import IMDBDataset
import sys import sys
import numpy as np import numpy as np
client = HttpClient()
client.load_client_config(sys.argv[1])
'''
if you want use GRPC-client, set_use_grpc_client(True)
or you can directly use client.grpc_client_predict(...)
as for HTTP-client,set_use_grpc_client(False)(which is default)
or you can directly use client.http_client_predict(...)
'''
#client.set_use_grpc_client(True)
'''
if you want to enable Encrypt Module,uncommenting the following line
'''
#client.use_key("./key")
'''
if you want to compress,uncommenting the following line
'''
#client.set_response_compress(True)
#client.set_request_compress(True)
'''
we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False
'''
#client.set_http_proto(True)
client.connect(["127.0.0.1:9292"])
class IMDBService(WebService): # you can define any english sentence or dataset here
def prepare_dict(self, args={}): # This example reuses imdb reader in training, you
if len(args) == 0: # can define your own data preprocessing easily.
exit(-1) imdb_dataset = IMDBDataset()
self.dataset = IMDBDataset() imdb_dataset.load_resource(sys.argv[2])
self.dataset.load_resource(args["dict_file_path"])
def preprocess(self, feed={}, fetch=[]):
feed_batch = []
words_lod = [0]
is_batch = True
for ins in feed:
words = self.dataset.get_words_only(ins["words"])
words = np.array(words).reshape(len(words), 1)
words_lod.append(words_lod[-1] + len(words))
feed_batch.append(words)
feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod}
return feed, fetch, is_batch
imdb_service = IMDBService(name="imdb") for line in sys.stdin:
imdb_service.load_model_config(sys.argv[1]) word_ids, label = imdb_dataset.get_words_and_label(line)
imdb_service.prepare_server( word_len = len(word_ids)
workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") feed = {
imdb_service.prepare_dict({"dict_file_path": sys.argv[4]}) "words": np.array(word_ids).reshape(word_len, 1),
imdb_service.run_rpc_service() "words.lod": [0, word_len]
imdb_service.run_web_service() }
#print(feed)
fetch = ["prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
print(fetch_map)
...@@ -4,28 +4,23 @@ ...@@ -4,28 +4,23 @@
### Get Model ### Get Model
``` ```
python -m paddle_serving_app.package --get_model lac python3 -m paddle_serving_app.package --get_model lac
tar -xzvf lac.tar.gz tar -xzvf lac.tar.gz
``` ```
#### Start RPC inference service #### Start inference service(Support BRPC-Client/GRPC-Client/Http-Client)
``` ```
python -m paddle_serving_server.serve --model lac_model/ --port 9292 python3 -m paddle_serving_server.serve --model lac_model/ --port 9292
``` ```
### RPC Infer ### BRPC Infer
``` ```
echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt echo "我爱北京天安门" | python3 lac_client.py lac_client/serving_client_conf.prototxt
``` ```
It will get the segmentation result. It will get the segmentation result.
### Start HTTP inference service ### GRPC/Http Infer
``` ```
python lac_web_service.py lac_model/ lac_workdir 9292 echo "我爱北京天安门" | python3 lac_http_client.py lac_client/serving_client_conf.prototxt
```
### HTTP Infer
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
``` ```
...@@ -4,28 +4,23 @@ ...@@ -4,28 +4,23 @@
### 获取模型 ### 获取模型
``` ```
python -m paddle_serving_app.package --get_model lac python3 -m paddle_serving_app.package --get_model lac
tar -xzvf lac.tar.gz tar -xzvf lac.tar.gz
``` ```
#### 开启RPC预测服务 #### 开启预测服务(支持BRPC-Client/GRPC-Client/Http-Client)
``` ```
python -m paddle_serving_server.serve --model lac_model/ --port 9292 python3 -m paddle_serving_server.serve --model lac_model/ --port 9292
``` ```
### 执行RPC预测 ### 执行BRPC预测
``` ```
echo "我爱北京天安门" | python lac_client.py lac_client/serving_client_conf.prototxt echo "我爱北京天安门" | python3 lac_client.py lac_client/serving_client_conf.prototxt
``` ```
我们就能得到分词结果 我们就能得到分词结果
### 开启HTTP预测服务 ### 执行GRPC/Http预测
``` ```
python lac_web_service.py lac_model/ lac_workdir 9292 echo "我爱北京天安门" | python3 lac_http_client.py lac_client/serving_client_conf.prototxt
```
### 执行HTTP预测
```
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
``` ```
# encoding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -11,17 +12,55 @@ ...@@ -11,17 +12,55 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#coding=utf-8 # pylint: disable=doc-string-missing
import requests
import json
import time
if __name__ == "__main__": from paddle_serving_client import HttpClient
server = "http://127.0.0.1:9280/lac/prediction" from paddle_serving_app.reader import LACReader
fin = open("jieba_test.txt", "r") import sys
start = time.time() import os
for line in fin: import io
req_data = {"words": line.strip(), "fetch": ["crf_decode"]} import numpy as np
r = requests.post(server, json=req_data)
end = time.time() client = HttpClient()
print(end - start) client.load_client_config(sys.argv[1])
'''
if you want use GRPC-client, set_use_grpc_client(True)
or you can directly use client.grpc_client_predict(...)
as for HTTP-client,set_use_grpc_client(False)(which is default)
or you can directly use client.http_client_predict(...)
'''
#client.set_use_grpc_client(True)
'''
if you want to enable Encrypt Module,uncommenting the following line
'''
#client.use_key("./key")
'''
if you want to compress,uncommenting the following line
'''
#client.set_response_compress(True)
#client.set_request_compress(True)
'''
we recommend use Proto data format in HTTP-body, set True(which is default)
if you want use JSON data format in HTTP-body, set False
'''
#client.set_http_proto(True)
client.connect(["127.0.0.1:9292"])
reader = LACReader()
for line in sys.stdin:
if len(line) <= 0:
continue
feed_data = reader.process(line)
if len(feed_data) <= 0:
continue
print(feed_data)
#fetch_map = client.predict(feed={"words": np.array(feed_data).reshape(len(feed_data), 1), "words.lod": [0, len(feed_data)]}, fetch=["crf_decode"], batch=True)
fetch_map = client.predict(
feed={
"words": np.array(feed_data + feed_data).reshape(
len(feed_data) * 2, 1),
"words.lod": [0, len(feed_data), 2 * len(feed_data)]
},
fetch=["crf_decode"],
batch=True)
print(fetch_map)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server.web_service import WebService
import sys
from paddle_serving_app.reader import LACReader
import numpy as np
class LACService(WebService):
def load_reader(self):
self.reader = LACReader()
def preprocess(self, feed={}, fetch=[]):
feed_batch = []
fetch = ["crf_decode"]
lod_info = [0]
is_batch = True
for ins in feed:
if "words" not in ins:
raise ("feed data error!")
feed_data = self.reader.process(ins["words"])
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
lod_info.append(lod_info[-1] + len(feed_data))
feed_dict = {
"words": np.concatenate(
feed_batch, axis=0),
"words.lod": lod_info
}
return feed_dict, fetch, is_batch
def postprocess(self, feed={}, fetch=[], fetch_map={}):
batch_ret = []
for idx, ins in enumerate(feed):
begin = fetch_map['crf_decode.lod'][idx]
end = fetch_map['crf_decode.lod'][idx + 1]
segs = self.reader.parse_result(ins["words"],
fetch_map["crf_decode"][begin:end])
batch_ret.append({"word_seg": "|".join(segs)})
return batch_ret
lac_service = LACService(name="lac")
lac_service.load_model_config(sys.argv[1])
lac_service.load_reader()
lac_service.prepare_server(
workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
lac_service.run_rpc_service()
lac_service.run_web_service()
...@@ -11,15 +11,15 @@ Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bce ...@@ -11,15 +11,15 @@ Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bce
wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
tar zxvf ResNet50_quant.tar.gz tar zxvf ResNet50_quant.tar.gz
python -m paddle_serving_client.convert --dirname ResNet50_quant python3 -m paddle_serving_client.convert --dirname ResNet50_quant
``` ```
Start RPC service, specify the GPU id and precision mode Start RPC service, specify the GPU id and precision mode
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 python3 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
``` ```
Request the serving service with Client Request the serving service with Client
``` ```
python resnet50_client.py python3 resnet50_client.py
``` ```
## Reference ## Reference
......
...@@ -10,15 +10,15 @@ ...@@ -10,15 +10,15 @@
wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
tar zxvf ResNet50_quant.tar.gz tar zxvf ResNet50_quant.tar.gz
python -m paddle_serving_client.convert --dirname ResNet50_quant python3 -m paddle_serving_client.convert --dirname ResNet50_quant
``` ```
启动rpc服务, 设定所选GPU id、部署模型精度 启动rpc服务, 设定所选GPU id、部署模型精度
``` ```
python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 python3 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8
``` ```
使用client进行请求 使用client进行请求
``` ```
python resnet50_client.py python3 resnet50_client.py
``` ```
## 参考文档 ## 参考文档
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet python3 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
tar -xzvf mobilenet_v2_imagenet.tar.gz tar -xzvf mobilenet_v2_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python3 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
``` ```
python mobilenet_tutorial.py python3 mobilenet_tutorial.py
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model mobilenet_v2_imagenet python3 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet
tar -xzvf mobilenet_v2_imagenet.tar.gz tar -xzvf mobilenet_v2_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python3 -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
``` ```
python mobilenet_tutorial.py python3 mobilenet_tutorial.py
``` ```
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model ocr_rec python3 -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det python3 -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz tar -xzvf ocr_det.tar.gz
``` ```
...@@ -23,16 +23,16 @@ tar xf test_imgs.tar ...@@ -23,16 +23,16 @@ tar xf test_imgs.tar
``` ```
#choose one of cpu/gpu commands as following #choose one of cpu/gpu commands as following
#for cpu user #for cpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python3 ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0 python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
python ocr_web_server.py gpu python3 ocr_web_server.py gpu
``` ```
### Client Prediction ### Client Prediction
``` ```
python ocr_web_client.py python3 ocr_web_client.py
``` ```
If you want a faster web service, please try Web LocalPredictor Service If you want a faster web service, please try Web LocalPredictor Service
...@@ -40,14 +40,14 @@ If you want a faster web service, please try Web LocalPredictor Service ...@@ -40,14 +40,14 @@ If you want a faster web service, please try Web LocalPredictor Service
``` ```
#choose one of cpu/gpu commands as following #choose one of cpu/gpu commands as following
#for cpu user #for cpu user
python ocr_debugger_server.py cpu python3 ocr_debugger_server.py cpu
#for gpu user #for gpu user
python ocr_debugger_server.py gpu python3 ocr_debugger_server.py gpu
``` ```
## Web LocalPredictor Client Prediction ## Web LocalPredictor Client Prediction
``` ```
python ocr_web_client.py python3 ocr_web_client.py
``` ```
## Benchmark ## Benchmark
...@@ -69,34 +69,34 @@ if you are going to detect images not recognize it or directly recognize the wor ...@@ -69,34 +69,34 @@ if you are going to detect images not recognize it or directly recognize the wor
### Det Server ### Det Server
``` ```
python det_web_server.py cpu #for cpu user python3 det_web_server.py cpu #for cpu user
python det_web_server.py gpu #for gpu user python3 det_web_server.py gpu #for gpu user
#or #or
python det_debugger_server.py cpu #for cpu user python3 det_debugger_server.py cpu #for cpu user
python det_debugger_server.py gpu #for gpu user python3 det_debugger_server.py gpu #for gpu user
``` ```
### Det Client ### Det Client
``` ```
# also use ocr_web_client.py # also use ocr_web_client.py
python ocr_web_client.py python3 ocr_web_client.py
``` ```
### Rec Server ### Rec Server
``` ```
python rec_web_server.py cpu #for cpu user python3 rec_web_server.py cpu #for cpu user
python rec_web_server.py gpu #for gpu user python3 rec_web_server.py gpu #for gpu user
#or #or
python rec_debugger_server.py cpu #for cpu user python3 rec_debugger_server.py cpu #for cpu user
python rec_debugger_server.py gpu #for gpu user python3 rec_debugger_server.py gpu #for gpu user
``` ```
### Rec Client ### Rec Client
``` ```
python rec_web_client.py python3 rec_web_client.py
``` ```
## C++ OCR Service ## C++ OCR Service
...@@ -109,9 +109,9 @@ Select a startup mode according to CPU / GPU device ...@@ -109,9 +109,9 @@ Select a startup mode according to CPU / GPU device
After the -- model parameter, the folder path of multiple model files is passed in to start the prediction service of multiple model concatenation. After the -- model parameter, the folder path of multiple model files is passed in to start the prediction service of multiple model concatenation.
``` ```
#for cpu user #for cpu user
python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
#for gpu user #for gpu user
python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0 python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
``` ```
### Client Prediction ### Client Prediction
...@@ -119,9 +119,9 @@ The pre-processing and post-processing is in the C + + server part, the image's ...@@ -119,9 +119,9 @@ The pre-processing and post-processing is in the C + + server part, the image's
so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed. so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed.
for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1. for this case, `feed_type` should be 20(which means the data type is string),`shape` should be 1.
By passing in multiple client folder paths, the client can be started for multi model prediction. By passing in multiple client folder paths, the client can be started for multi model prediction.
``` ```
python ocr_cpp_client.py ocr_det_client ocr_rec_client python3 ocr_cpp_client.py ocr_det_client ocr_rec_client
``` ```
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model ocr_rec python3 -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det python3 -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz tar -xzvf ocr_det.tar.gz
``` ```
## 获取数据集(可选) ## 获取数据集(可选)
...@@ -22,16 +22,16 @@ tar xf test_imgs.tar ...@@ -22,16 +22,16 @@ tar xf test_imgs.tar
``` ```
#根据CPU/GPU设备选择一种启动方式 #根据CPU/GPU设备选择一种启动方式
#for cpu user #for cpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python3 ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0 python3 -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_ids 0
python ocr_web_server.py gpu python3 ocr_web_server.py gpu
``` ```
### 启动客户端 ### 启动客户端
``` ```
python ocr_web_client.py python3 ocr_web_client.py
``` ```
如果用户需要更快的执行速度,请尝试LocalPredictor版Web服务 如果用户需要更快的执行速度,请尝试LocalPredictor版Web服务
...@@ -39,14 +39,14 @@ python ocr_web_client.py ...@@ -39,14 +39,14 @@ python ocr_web_client.py
``` ```
#根据CPU/GPU设备选择一种启动方式 #根据CPU/GPU设备选择一种启动方式
#for cpu user #for cpu user
python ocr_debugger_server.py cpu python3 ocr_debugger_server.py cpu
#for gpu user #for gpu user
python ocr_debugger_server.py gpu python3 ocr_debugger_server.py gpu
``` ```
## 启动客户端 ## 启动客户端
``` ```
python ocr_web_client.py python3 ocr_web_client.py
``` ```
## 性能指标 ## 性能指标
...@@ -69,34 +69,34 @@ GPU: Nvidia Tesla V100单卡 ...@@ -69,34 +69,34 @@ GPU: Nvidia Tesla V100单卡
### 启动检测服务 ### 启动检测服务
``` ```
python det_web_server.py cpu #for cpu user python3 det_web_server.py cpu #for cpu user
python det_web_server.py gpu #for gpu user python3 det_web_server.py gpu #for gpu user
#or #or
python det_debugger_server.py cpu #for cpu user python3 det_debugger_server.py cpu #for cpu user
python det_debugger_server.py gpu #for gpu user python3 det_debugger_server.py gpu #for gpu user
``` ```
### 检测服务客户端 ### 检测服务客户端
``` ```
# also use ocr_web_client.py # also use ocr_web_client.py
python ocr_web_client.py python3 ocr_web_client.py
``` ```
### 启动识别服务 ### 启动识别服务
``` ```
python rec_web_server.py cpu #for cpu user python3 rec_web_server.py cpu #for cpu user
python rec_web_server.py gpu #for gpu user python3 rec_web_server.py gpu #for gpu user
#or #or
python rec_debugger_server.py cpu #for cpu user python3 rec_debugger_server.py cpu #for cpu user
python rec_debugger_server.py gpu #for gpu user python3 rec_debugger_server.py gpu #for gpu user
``` ```
### 识别服务客户端 ### 识别服务客户端
``` ```
python rec_web_client.py python3 rec_web_client.py
``` ```
## C++ OCR Service服务 ## C++ OCR Service服务
...@@ -108,9 +108,9 @@ python rec_web_client.py ...@@ -108,9 +108,9 @@ python rec_web_client.py
通过--model后,指定多个模型文件的文件夹路径来启动多模型串联的预测服务。 通过--model后,指定多个模型文件的文件夹路径来启动多模型串联的预测服务。
``` ```
#for cpu user #for cpu user
python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293
#for gpu user #for gpu user
python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0 python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_ids 0
``` ```
### 启动客户端 ### 启动客户端
...@@ -118,9 +118,9 @@ python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port ...@@ -118,9 +118,9 @@ python -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port
`ocr_det_client/serving_client_conf.prototxt``feed_var`字段 `ocr_det_client/serving_client_conf.prototxt``feed_var`字段
对于本示例而言,`feed_type`应修改为3(数据类型为string),`shape`为1. 对于本示例而言,`feed_type`应修改为20(数据类型为string),`shape`为1.
通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。 通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。
``` ```
python ocr_cpp_client.py ocr_det_client ocr_rec_client python3 ocr_cpp_client.py ocr_det_client ocr_rec_client
``` ```
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -4,17 +4,17 @@ This document will takes Imagenet service as an example to introduce how to use ...@@ -4,17 +4,17 @@ This document will takes Imagenet service as an example to introduce how to use
## Get model ## Get model
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -4,18 +4,17 @@ ...@@ -4,18 +4,17 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18000/imagenet/prediction" url = "http://127.0.0.1:18000/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,8 @@ ...@@ -11,10 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -10,10 +10,10 @@ sh get_model.sh ...@@ -10,10 +10,10 @@ sh get_model.sh
## Start server ## Start server
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## RPC test ## RPC test
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
...@@ -10,11 +10,10 @@ sh get_model.sh ...@@ -10,11 +10,10 @@ sh get_model.sh
## 启动服务 ## 启动服务
``` ```
python resnet50_web_service.py &>log.txt & python3 resnet50_web_service.py &>log.txt &
``` ```
## 测试 ## 测试
``` ```
python pipeline_rpc_client.py python3 pipeline_rpc_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import base64 import base64
...@@ -5,16 +19,16 @@ import yaml ...@@ -5,16 +19,16 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout): ...@@ -24,9 +38,10 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id): ...@@ -34,15 +49,17 @@ def gen_yml(device, gpu_id):
config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
else: else:
config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18080/imagenet/prediction" url = "http://127.0.0.1:18080/imagenet/prediction"
start = time.time() start = time.time()
with open(os.path.join(".", "daisy.jpg"), 'rb') as file: with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
...@@ -68,6 +85,7 @@ def run_http(idx, batch_size): ...@@ -68,6 +85,7 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size): ...@@ -87,6 +105,7 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
client.connect(['127.0.0.1:18080']) client.connect(['127.0.0.1:18080'])
...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size): ...@@ -107,11 +126,12 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
if device == "gpu": if device == "gpu":
...@@ -120,7 +140,7 @@ if __name__ == "__main__": ...@@ -120,7 +140,7 @@ if __name__ == "__main__":
gpu_id = None gpu_id = None
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -131,4 +151,3 @@ if __name__ == "__main__": ...@@ -131,4 +151,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -11,10 +11,7 @@ ...@@ -11,10 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: from paddle_serving_server.pipeline import PipelineClient
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
import requests import requests
import json import json
......
...@@ -13,10 +13,8 @@ ...@@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import base64, cv2 import base64, cv2
......
...@@ -8,11 +8,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -8,11 +8,11 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python web_service.py python3 web_service.py
``` ```
### Perform prediction ### Perform prediction
``` ```
python pipeline_http_client.py python3 pipeline_http_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import yaml import yaml
...@@ -6,20 +20,20 @@ import time ...@@ -6,20 +20,20 @@ import time
import json import json
import cv2 import cv2
import base64 import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout): ...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 30} config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu": if device == "gpu":
config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1 config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/faster_rcnn/prediction" url = "http://127.0.0.1:18082/faster_rcnn/prediction"
...@@ -65,6 +81,7 @@ def run_http(idx, batch_size): ...@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
break break
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size): ...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
pass pass
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
gpu_id = sys.argv[5] gpu_id = sys.argv[5]
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -110,4 +130,3 @@ if __name__ == "__main__": ...@@ -110,4 +130,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -25,7 +25,7 @@ class FasterRCNNOp(Op): ...@@ -25,7 +25,7 @@ class FasterRCNNOp(Op):
self.img_preprocess = Sequential([ self.img_preprocess = Sequential([
BGR2RGB(), Div(255.0), BGR2RGB(), Div(255.0),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
Resize((640, 640)), Transpose((2, 0, 1)) Resize(640, 640), Transpose((2, 0, 1))
]) ])
self.img_postprocess = RCNNPostprocess("label_list.txt", "output") self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
......
...@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ppyolo_mbv3_large_coco.tar tar xf ppyolo_mbv3_large_coco.tar
python web_service.py python3 web_service.py
``` ```
### Perform prediction ### Perform prediction
``` ```
python pipeline_http_client.py python3 pipeline_http_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import yaml import yaml
...@@ -6,20 +20,20 @@ import time ...@@ -6,20 +20,20 @@ import time
import json import json
import cv2 import cv2
import base64 import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout): ...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 30} config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu": if device == "gpu":
config["op"]["ppyolo_mbv3"]["local_service_conf"]["device_type"] = 1 config["op"]["ppyolo_mbv3"]["local_service_conf"]["device_type"] = 1
config["op"]["ppyolo_mbv3"]["local_service_conf"]["devices"] = gpu_id config["op"]["ppyolo_mbv3"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction" url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
...@@ -65,6 +81,7 @@ def run_http(idx, batch_size): ...@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
break break
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size): ...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
pass pass
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
gpu_id = sys.argv[5] gpu_id = sys.argv[5]
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -110,4 +130,3 @@ if __name__ == "__main__": ...@@ -110,4 +130,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,11 +10,10 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python web_service.py python3 web_service.py
``` ```
### Perform prediction ### Perform prediction
``` ```
python pipeline_http_client.py python3 pipeline_http_client.py
``` ```
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import yaml import yaml
...@@ -6,20 +20,20 @@ import time ...@@ -6,20 +20,20 @@ import time
import json import json
import cv2 import cv2
import base64 import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout): ...@@ -29,17 +43,19 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id): def gen_yml(device, gpu_id):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 30} config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu": if device == "gpu":
config["op"]["yolov3"]["local_service_conf"]["device_type"] = 1 config["op"]["yolov3"]["local_service_conf"]["device_type"] = 1
config["op"]["yolov3"]["local_service_conf"]["devices"] = gpu_id config["op"]["yolov3"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/yolov3/prediction" url = "http://127.0.0.1:18082/yolov3/prediction"
...@@ -65,6 +81,7 @@ def run_http(idx, batch_size): ...@@ -65,6 +81,7 @@ def run_http(idx, batch_size):
break break
return [[end - start], latency_list, [total_num]] return [[end - start], latency_list, [total_num]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
start = time.time() start = time.time()
...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size): ...@@ -84,22 +101,25 @@ def multithread_http(thread, batch_size):
total_cost)) total_cost))
show_latency(result[1]) show_latency(result[1])
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
pass pass
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
gpu_id = sys.argv[5] gpu_id = sys.argv[5]
gen_yml(device, gpu_id) gen_yml(device, gpu_id)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -110,4 +130,3 @@ if __name__ == "__main__": ...@@ -110,4 +130,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import yaml import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
...@@ -38,9 +50,11 @@ from paddle_serving_client.utils import benchmark_args, show_latency ...@@ -38,9 +50,11 @@ from paddle_serving_client.utils import benchmark_args, show_latency
2021-03-16 10:26:01,840 chl0(In: ['@DAGExecutor'], Out: ['bert']) size[0/0] 2021-03-16 10:26:01,840 chl0(In: ['@DAGExecutor'], Out: ['bert']) size[0/0]
2021-03-16 10:26:01,841 chl1(In: ['bert'], Out: ['@DAGExecutor']) size[0/0] 2021-03-16 10:26:01,841 chl1(In: ['bert'], Out: ['@DAGExecutor']) size[0/0]
''' '''
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -50,20 +64,22 @@ def parse_benchmark(filein, fileout): ...@@ -50,20 +64,22 @@ def parse_benchmark(filein, fileout):
with open(fileout, "w") as fout: with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False) yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device): def gen_yml(device):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
config["op"]["bert"]["local_service_conf"]["device_type"] = 1 config["op"]["bert"]["local_service_conf"]["device_type"] = 1
config["op"]["bert"]["local_service_conf"]["devices"] = "2" config["op"]["bert"]["local_service_conf"]["devices"] = "2"
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/bert/prediction" url = "http://127.0.0.1:18082/bert/prediction"
start = time.time() start = time.time()
with open("data-c.txt", 'r') as fin: with open("data-c.txt", 'r') as fin:
start = time.time() start = time.time()
...@@ -84,9 +100,11 @@ def run_http(idx, batch_size): ...@@ -84,9 +100,11 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start]] return [[end - start]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size) result = multi_thread_runner.run(run_http, thread, batch_size)
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
...@@ -110,16 +128,17 @@ def run_rpc(thread, batch_size): ...@@ -110,16 +128,17 @@ def run_rpc(thread, batch_size):
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
device = sys.argv[4] device = sys.argv[4]
gen_yml(device) gen_yml(device)
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
...@@ -130,4 +149,3 @@ if __name__ == "__main__": ...@@ -130,4 +149,3 @@ if __name__ == "__main__":
filein = sys.argv[2] filein = sys.argv[2]
fileout = sys.argv[3] fileout = sys.argv[3]
parse_benchmark(filein, fileout) parse_benchmark(filein, fileout)
...@@ -19,10 +19,8 @@ import yaml ...@@ -19,10 +19,8 @@ import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
...@@ -30,7 +28,7 @@ from paddle_serving_client.utils import benchmark_args, show_latency ...@@ -30,7 +28,7 @@ from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout): def parse_benchmark(filein, fileout):
with open(filein, "r") as fin: with open(filein, "r") as fin:
res = yaml.load(fin) res = yaml.load(fin, yaml.FullLoader)
del_list = [] del_list = []
for key in res["DAG"].keys(): for key in res["DAG"].keys():
if "call" in key: if "call" in key:
...@@ -43,7 +41,7 @@ def parse_benchmark(filein, fileout): ...@@ -43,7 +41,7 @@ def parse_benchmark(filein, fileout):
def gen_yml(device): def gen_yml(device):
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 10} config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu": if device == "gpu":
......
...@@ -10,7 +10,7 @@ sh get_data.sh ...@@ -10,7 +10,7 @@ sh get_data.sh
## Start server ## Start server
``` ```
python web_service.py &>log.txt & python3 web_service.py &>log.txt &
``` ```
## Http test ## Http test
......
...@@ -10,7 +10,7 @@ sh get_data.sh ...@@ -10,7 +10,7 @@ sh get_data.sh
## 启动服务 ## 启动服务
``` ```
python web_service.py &>log.txt & python3 web_service.py &>log.txt &
``` ```
## 测试 ## 测试
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys import sys
import os import os
import yaml import yaml
import requests import requests
import time import time
import json import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_client.utils import benchmark_args, show_latency
def gen_yml(): def gen_yml():
fin = open("config.yml", "r") fin = open("config.yml", "r")
config = yaml.load(fin) config = yaml.load(fin, yaml.FullLoader)
fin.close() fin.close()
config["dag"]["tracer"] = {"interval_s": 5} config["dag"]["tracer"] = {"interval_s": 5}
with open("config2.yml", "w") as fout: with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False) yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size): def run_http(idx, batch_size):
print("start thread ({})".format(idx)) print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/uci/prediction" url = "http://127.0.0.1:18082/uci/prediction"
start = time.time() start = time.time()
value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332" value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
all_value = ";".join([value for i in range(batch_size)]) all_value = ";".join([value for i in range(batch_size)])
...@@ -33,9 +47,11 @@ def run_http(idx, batch_size): ...@@ -33,9 +47,11 @@ def run_http(idx, batch_size):
end = time.time() end = time.time()
return [[end - start]] return [[end - start]]
def multithread_http(thread, batch_size): def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size) result = multi_thread_runner.run(run_http, thread, batch_size)
def run_rpc(thread, batch_size): def run_rpc(thread, batch_size):
client = PipelineClient() client = PipelineClient()
...@@ -44,25 +60,26 @@ def run_rpc(thread, batch_size): ...@@ -44,25 +60,26 @@ def run_rpc(thread, batch_size):
all_value = ";".join([value for i in range(batch_size)]) all_value = ";".join([value for i in range(batch_size)])
data = {"key": "x", "value": all_value} data = {"key": "x", "value": all_value}
for i in range(1000): for i in range(1000):
ret = client.predict(feed_dict={data["key"]: data["value"]}, fetch=["res"]) ret = client.predict(
feed_dict={data["key"]: data["value"]}, fetch=["res"])
print(ret) print(ret)
def multithread_rpc(thraed, batch_size): def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size) result = multi_thread_runner.run(run_rpc, thread, batch_size)
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "yaml": if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3]) thread = int(sys.argv[3])
gen_yml() gen_yml()
elif sys.argv[1] == "run": elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3]) thread = int(sys.argv[3])
batch_size = int(sys.argv[4]) batch_size = int(sys.argv[4])
if mode == "http": if mode == "http":
multithread_http(thread, batch_size) multithread_http(thread, batch_size)
elif mode == "rpc": elif mode == "rpc":
multithread_rpc(thread, batch_size) multithread_rpc(thread, batch_size)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
``` ```
python resnet50_v2_tutorial.py python3 resnet50_v2_tutorial.py
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
``` ```
python resnet50_v2_tutorial.py python3 resnet50_v2_tutorial.py
``` ```
...@@ -3,16 +3,16 @@ ...@@ -3,16 +3,16 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model senta_bilstm python3 -m paddle_serving_app.package --get_model senta_bilstm
python -m paddle_serving_app.package --get_model lac python3 -m paddle_serving_app.package --get_model lac
tar -xzvf senta_bilstm.tar.gz tar -xzvf senta_bilstm.tar.gz
tar -xzvf lac.tar.gz tar -xzvf lac.tar.gz
``` ```
## Start HTTP Service ## Start HTTP Service
``` ```
python -m paddle_serving_server.serve --model lac_model --port 9300 python3 -m paddle_serving_server.serve --model lac_model --port 9300
python senta_web_service.py python3 senta_web_service.py
``` ```
In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac). In the Chinese sentiment classification task, the Chinese word segmentation needs to be done through [LAC task] (../lac).
In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task. In this demo, the LAC task is placed in the preprocessing part of the HTTP prediction service of the sentiment classification task.
......
...@@ -3,16 +3,16 @@ ...@@ -3,16 +3,16 @@
## 获取模型文件 ## 获取模型文件
``` ```
python -m paddle_serving_app.package --get_model senta_bilstm python3 -m paddle_serving_app.package --get_model senta_bilstm
python -m paddle_serving_app.package --get_model lac python3 -m paddle_serving_app.package --get_model lac
tar -xzvf lac.tar.gz tar -xzvf lac.tar.gz
tar -xzvf senta_bilstm.tar.gz tar -xzvf senta_bilstm.tar.gz
``` ```
## 启动HTTP服务 ## 启动HTTP服务
``` ```
python -m paddle_serving_server.serve --model lac_model --port 9300 python3 -m paddle_serving_server.serve --model lac_model --port 9300
python senta_web_service.py python3 senta_web_service.py
``` ```
中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词。 中文情感分类任务中需要先通过[LAC任务](../lac)进行中文分词。
示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分。 示例中将LAC任务放在情感分类任务的HTTP预测服务的预处理部分。
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model unet python3 -m paddle_serving_app.package --get_model unet
tar -xzvf unet.tar.gz tar -xzvf unet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494 python3 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
``` ```
python seg_client.py python3 seg_client.py
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model unet python3 -m paddle_serving_app.package --get_model unet
tar -xzvf unet.tar.gz tar -xzvf unet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf unet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494 python3 -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
``` ```
python seg_client.py python3 seg_client.py
``` ```
...@@ -13,14 +13,14 @@ In order to show the time consuming of each stage more intuitively, a script is ...@@ -13,14 +13,14 @@ In order to show the time consuming of each stage more intuitively, a script is
When using, first save the output of the client to a file, taking `profile` as an example. When using, first save the output of the client to a file, taking `profile` as an example.
``` ```
python show_profile.py profile ${thread_num} python3 show_profile.py profile ${thread_num}
``` ```
Here the `thread_num` parameter is the number of processes when the client is running, and the script will calculate the average time spent in each phase according to this parameter. Here the `thread_num` parameter is the number of processes when the client is running, and the script will calculate the average time spent in each phase according to this parameter.
The script calculates the time spent in each stage, divides by the number of threads to average, and prints to standard output. The script calculates the time spent in each stage, divides by the number of threads to average, and prints to standard output.
``` ```
python timeline_trace.py profile trace python3 timeline_trace.py profile trace
``` ```
The script converts the time-dot information in the log into a json format and saves it to a trace file. The trace file can be visualized through the tracing function of the Chrome browser. The script converts the time-dot information in the log into a json format and saves it to a trace file. The trace file can be visualized through the tracing function of the Chrome browser.
......
...@@ -13,14 +13,14 @@ export FLAGS_profile_server=1 #开启server端各阶段时间打点 ...@@ -13,14 +13,14 @@ export FLAGS_profile_server=1 #开启server端各阶段时间打点
使用时先将client的输出保存到文件,以profile为例。 使用时先将client的输出保存到文件,以profile为例。
``` ```
python show_profile.py profile ${thread_num} python3 show_profile.py profile ${thread_num}
``` ```
这里thread_num参数为client运行时的进程数,脚本将按照这个参数来计算各阶段的平均耗时。 这里thread_num参数为client运行时的进程数,脚本将按照这个参数来计算各阶段的平均耗时。
脚本将计算各阶段的耗时,并除以线程数做平均,打印到标准输出。 脚本将计算各阶段的耗时,并除以线程数做平均,打印到标准输出。
``` ```
python timeline_trace.py profile trace python3 timeline_trace.py profile trace
``` ```
脚本将日志中的时间打点信息转换成json格式保存到trace文件,trace文件可以通过chrome浏览器的tracing功能进行可视化。 脚本将日志中的时间打点信息转换成json格式保存到trace文件,trace文件可以通过chrome浏览器的tracing功能进行可视化。
......
# coding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
def load(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, feed=[], fetch=[]):
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), use_lite=True, use_xpu=True, ir_optim=True)
bert_service.run_rpc_service()
bert_service.run_web_service()
# coding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
def load(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, feed=[], fetch=[]):
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), use_lite=True, use_xpu=True, ir_optim=True)
bert_service.run_rpc_service()
bert_service.run_web_service()
...@@ -13,28 +13,13 @@ sh get_data.sh ...@@ -13,28 +13,13 @@ sh get_data.sh
### Start server ### Start server
You can use the following code to start the RPC service You can use the following code to start the RPC service
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client prediction ### Client prediction
The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`). The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip3 install paddlepaddle`).
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python3 test_client.py uci_housing_client/serving_client_conf.prototxt
```
## HTTP service
### Start server
Start a web service with default web service hosting modules:
``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
```
### Client prediction
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
...@@ -15,35 +15,19 @@ sh get_data.sh ...@@ -15,35 +15,19 @@ sh get_data.sh
### 开启服务端 ### 开启服务端
``` shell ``` shell
python test_server.py uci_housing_model/ python3 test_server.py uci_housing_model/
``` ```
也可以通过下面的一行代码开启默认RPC服务: 也可以通过下面的一行代码开启默认RPC服务:
```shell ```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### 客户端预测 ### 客户端预测
`test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip install paddlepaddle`)。 `test_client.py`中使用了`paddlepaddle`包,需要进行下载(`pip3 install paddlepaddle`)。
``` shell ``` shell
python test_client.py uci_housing_client/serving_client_conf.prototxt python3 test_client.py uci_housing_client/serving_client_conf.prototxt
```
## HTTP服务
### 开启服务端
通过下面的一行代码开启默认web服务:
``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim --name uci
```
### 客户端预测
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client Prediction ### Client Prediction
``` ```
python resnet50_client.py python3 resnet50_client.py
``` ```
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model resnet_v2_50_imagenet python3 -m paddle_serving_app.package --get_model resnet_v2_50_imagenet
tar -xzvf resnet_v2_50_imagenet.tar.gz tar -xzvf resnet_v2_50_imagenet.tar.gz
``` ```
...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,11 +12,11 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### 客户端预测 ### 客户端预测
``` ```
python resnet50_client.py python3 resnet50_client.py
``` ```
...@@ -26,5 +26,5 @@ python3 -m paddle_serving_server.serve --model serving_server --port 7702 --use_ ...@@ -26,5 +26,5 @@ python3 -m paddle_serving_server.serve --model serving_server --port 7702 --use_
### Client Prediction ### Client Prediction
``` ```
python vgg19_client.py python3 vgg19_client.py
``` ```
...@@ -5,19 +5,19 @@ ...@@ -5,19 +5,19 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model yolov4 python3 -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz tar -xzvf yolov4.tar.gz
``` ```
## Start RPC Service ## Start RPC Service
``` ```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 python3 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## Prediction ## Prediction
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder. After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
...@@ -5,20 +5,20 @@ ...@@ -5,20 +5,20 @@
## 获取模型 ## 获取模型
``` ```
python -m paddle_serving_app.package --get_model yolov4 python3 -m paddle_serving_app.package --get_model yolov4
tar -xzvf yolov4.tar.gz tar -xzvf yolov4.tar.gz
``` ```
## 启动RPC服务 ## 启动RPC服务
``` ```
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 python3 -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## 预测 ## 预测
``` ```
python test_client.py 000000570688.jpg python3 test_client.py 000000570688.jpg
``` ```
预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。 预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
...@@ -22,6 +22,7 @@ import argparse ...@@ -22,6 +22,7 @@ import argparse
from .proto import general_model_config_pb2 as m_config from .proto import general_model_config_pb2 as m_config
import paddle.inference as paddle_infer import paddle.inference as paddle_infer
import logging import logging
import glob
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("LocalPredictor") logger = logging.getLogger("LocalPredictor")
...@@ -51,6 +52,23 @@ class LocalPredictor(object): ...@@ -51,6 +52,23 @@ class LocalPredictor(object):
self.fetch_names_to_idx_ = {} self.fetch_names_to_idx_ = {}
self.fetch_names_to_type_ = {} self.fetch_names_to_type_ = {}
def search_suffix_files(self, model_path, target_suffix):
"""
Find all files with the suffix xxx in the specified directory.
Args:
model_path: model directory, not None.
target_suffix: filenames with target suffix, not None. e.g: *.pdmodel
Returns:
file_list, None, [] or [path, ] .
"""
if model_path is None or target_suffix is None:
return None
file_list = glob.glob(os.path.join(model_path, target_suffix))
return file_list
def load_model_config(self, def load_model_config(self,
model_path, model_path,
use_gpu=False, use_gpu=False,
...@@ -97,11 +115,30 @@ class LocalPredictor(object): ...@@ -97,11 +115,30 @@ class LocalPredictor(object):
f = open(client_config, 'r') f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge( model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf) str(f.read()), model_conf)
# Init paddle_infer config
# Paddle's model files and parameter files have multiple naming rules:
# 1) __model__, __params__
# 2) *.pdmodel, *.pdiparams
# 3) __model__, conv2d_1.w_0, conv2d_2.w_0, fc_1.w_0, conv2d_1.b_0, ...
pdmodel_file_list = self.search_suffix_files(model_path, "*.pdmodel")
pdiparams_file_list = self.search_suffix_files(model_path,
"*.pdiparams")
if os.path.exists(os.path.join(model_path, "__params__")): if os.path.exists(os.path.join(model_path, "__params__")):
# case 1) initializing
config = paddle_infer.Config( config = paddle_infer.Config(
os.path.join(model_path, "__model__"), os.path.join(model_path, "__model__"),
os.path.join(model_path, "__params__")) os.path.join(model_path, "__params__"))
elif pdmodel_file_list and len(
pdmodel_file_list) > 0 and pdiparams_file_list and len(
pdiparams_file_list) > 0:
# case 2) initializing
logger.info("pdmodel_file_list:{}, pdiparams_file_list:{}".format(
pdmodel_file_list, pdiparams_file_list))
config = paddle_infer.Config(pdmodel_file_list[0],
pdiparams_file_list[0])
else: else:
# case 3) initializing.
config = paddle_infer.Config(model_path) config = paddle_infer.Config(model_path)
logger.info( logger.info(
...@@ -201,8 +238,9 @@ class LocalPredictor(object): ...@@ -201,8 +238,9 @@ class LocalPredictor(object):
Run model inference by Paddle Inference API. Run model inference by Paddle Inference API.
Args: Args:
feed: feed var feed: feed var list, None is not allowed.
fetch: fetch var fetch: fetch var list, None allowed. when it is None, all fetch
vars are returned. Otherwise, return fetch specified result.
batch: batch data or not, False default.If batch is False, a new batch: batch data or not, False default.If batch is False, a new
dimension is added to header of the shape[np.newaxis]. dimension is added to header of the shape[np.newaxis].
log_id: for logging log_id: for logging
...@@ -210,16 +248,8 @@ class LocalPredictor(object): ...@@ -210,16 +248,8 @@ class LocalPredictor(object):
Returns: Returns:
fetch_map: dict fetch_map: dict
""" """
if feed is None or fetch is None: if feed is None:
raise ValueError("You should specify feed and fetch for prediction.\ raise ValueError("You should specify feed vars for prediction.\
log_id:{}".format(log_id))
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string.\
log_id:{}".format(log_id)) log_id:{}".format(log_id))
feed_batch = [] feed_batch = []
...@@ -231,18 +261,20 @@ class LocalPredictor(object): ...@@ -231,18 +261,20 @@ class LocalPredictor(object):
raise ValueError("Feed only accepts dict and list of dict.\ raise ValueError("Feed only accepts dict and list of dict.\
log_id:{}".format(log_id)) log_id:{}".format(log_id))
fetch_names = [] fetch_list = []
if fetch is not None:
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
# Filter invalid fetch names # Filter invalid fetch names
fetch_names = []
for key in fetch_list: for key in fetch_list:
if key in self.fetch_names_: if key in self.fetch_names_:
fetch_names.append(key) fetch_names.append(key)
if len(fetch_names) == 0: # Assemble the input data of paddle predictor, and filter invalid inputs.
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.\
log_id:{}".format(log_id))
# Assemble the input data of paddle predictor
input_names = self.predictor.get_input_names() input_names = self.predictor.get_input_names()
for name in input_names: for name in input_names:
if isinstance(feed[name], list): if isinstance(feed[name], list):
...@@ -282,11 +314,15 @@ class LocalPredictor(object): ...@@ -282,11 +314,15 @@ class LocalPredictor(object):
input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :]) input_tensor_handle.copy_from_cpu(feed[name][np.newaxis, :])
else: else:
input_tensor_handle.copy_from_cpu(feed[name]) input_tensor_handle.copy_from_cpu(feed[name])
# set output tensor handlers
output_tensor_handles = [] output_tensor_handles = []
output_name_to_index_dict = {}
output_names = self.predictor.get_output_names() output_names = self.predictor.get_output_names()
for output_name in output_names: for i, output_name in enumerate(output_names):
output_tensor_handle = self.predictor.get_output_handle(output_name) output_tensor_handle = self.predictor.get_output_handle(output_name)
output_tensor_handles.append(output_tensor_handle) output_tensor_handles.append(output_tensor_handle)
output_name_to_index_dict[output_name] = i
# Run inference # Run inference
self.predictor.run() self.predictor.run()
...@@ -296,10 +332,43 @@ class LocalPredictor(object): ...@@ -296,10 +332,43 @@ class LocalPredictor(object):
for output_tensor_handle in output_tensor_handles: for output_tensor_handle in output_tensor_handles:
output = output_tensor_handle.copy_to_cpu() output = output_tensor_handle.copy_to_cpu()
outputs.append(output) outputs.append(output)
outputs_len = len(outputs)
# Copy fetch vars. If fetch is None, it will copy all results from output_tensor_handles.
# Otherwise, it will copy the fields specified from output_tensor_handles.
fetch_map = {} fetch_map = {}
for i, name in enumerate(fetch): if fetch is None:
fetch_map[name] = outputs[i] for i, name in enumerate(output_names):
if len(output_tensor_handles[i].lod()) > 0: fetch_map[name] = outputs[i]
fetch_map[name + ".lod"] = np.array(output_tensor_handles[i] if len(output_tensor_handles[i].lod()) > 0:
.lod()[0]).astype('int32') fetch_map[name + ".lod"] = np.array(output_tensor_handles[
i].lod()[0]).astype('int32')
else:
# Because the save_inference_model interface will increase the scale op
# in the network, the name of fetch_var is different from that in prototxt.
# Therefore, it is compatible with v0.6.x and the previous model save format,
# and here is compatible with the results that do not match.
fetch_match_num = 0
for i, name in enumerate(fetch):
output_index = output_name_to_index_dict.get(name)
if output_index is None:
continue
fetch_map[name] = outputs[output_index]
fetch_match_num += 1
if len(output_tensor_handles[output_index].lod()) > 0:
fetch_map[name + ".lod"] = np.array(output_tensor_handles[
output_index].lod()[0]).astype('int32')
# Compatible with v0.6.x and lower versions model saving formats.
if fetch_match_num == 0:
logger.debug("fetch match num is 0. Retrain the model please!")
for i, name in enumerate(fetch):
if i >= outputs_len:
break
fetch_map[name] = outputs[i]
if len(output_tensor_handles[i].lod()) > 0:
fetch_map[name + ".lod"] = np.array(
output_tensor_handles[i].lod()[0]).astype('int32')
return fetch_map return fetch_map
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from .chinese_bert_reader import ChineseBertReader from .chinese_bert_reader import ChineseBertReader
from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage
from .image_reader import DetectionFile2Image, DetectionSequential, DetectionNormalize, DetectionTranspose, DetectionResize, DetectionBGR2RGB, DetectionPadStride
from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor
from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess
from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
......
...@@ -498,6 +498,42 @@ class Sequential(object): ...@@ -498,6 +498,42 @@ class Sequential(object):
return format_string_ return format_string_
class DetectionSequential(object):
"""
Args:
sequence (sequence of ``Transform`` objects): list of transforms to chain.
This API references some of the design pattern of torchvision
Users can simply use this API in training as well
Example:
>>> image_reader.Sequnece([
>>> transforms.CenterCrop(10),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, im):
im_info = {
'scale_factor': np.array(
[1., 1.], dtype=np.float32),
'im_shape': None,
}
for t in self.transforms:
im, im_info = t(im, im_info)
return im, im_info
def __repr__(self):
format_string_ = self.__class__.__name__ + '('
for t in self.transforms:
format_string_ += '\n'
format_string_ += ' {0}'.format(t)
format_string_ += '\n)'
return format_string_
class RGB2BGR(object): class RGB2BGR(object):
def __init__(self): def __init__(self):
pass pass
...@@ -520,6 +556,17 @@ class BGR2RGB(object): ...@@ -520,6 +556,17 @@ class BGR2RGB(object):
return self.__class__.__name__ + "()" return self.__class__.__name__ + "()"
class DetectionBGR2RGB(object):
def __init__(self):
pass
def __call__(self, img, img_info=None):
return img[:, :, ::-1], img_info
def __repr__(self):
return self.__class__.__name__ + "()"
class String2Image(object): class String2Image(object):
def __init__(self): def __init__(self):
pass pass
...@@ -556,6 +603,33 @@ class File2Image(object): ...@@ -556,6 +603,33 @@ class File2Image(object):
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + "()" return self.__class__.__name__ + "()"
class DetectionFile2Image(object):
def __init__(self):
pass
def __call__(self, img_path, im_info=None):
if py_version == 2:
fin = open(img_path)
else:
fin = open(img_path, "rb")
sample = fin.read()
data = np.fromstring(sample, np.uint8)
img = cv2.imdecode(data, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
'''
img = cv2.imread(img_path, -1)
channels = img.shape[2]
ori_h = img.shape[0]
ori_w = img.shape[1]
'''
if im_info is not None:
im_info['im_shape'] = np.array(img.shape[:2], dtype=np.float32)
im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return img, im_info
def __repr__(self):
return self.__class__.__name__ + "()"
class URL2Image(object): class URL2Image(object):
def __init__(self): def __init__(self):
...@@ -607,6 +681,27 @@ class Div(object): ...@@ -607,6 +681,27 @@ class Div(object):
def __repr__(self): def __repr__(self):
return self.__class__.__name__ + "({})".format(self.value) return self.__class__.__name__ + "({})".format(self.value)
class DetectionDiv(object):
""" divide by some float number """
def __init__(self, value):
self.value = value
def __call__(self, img, img_info=None):
"""
Args:
img (numpy array): (int8 numpy array)
Returns:
img (numpy array): (float32 numpy array)
"""
img = img.astype('float32') / self.value
return img, img_info
def __repr__(self):
return self.__class__.__name__ + "({})".format(self.value)
class Normalize(object): class Normalize(object):
"""Normalize a tensor image with mean and standard deviation. """Normalize a tensor image with mean and standard deviation.
...@@ -643,6 +738,51 @@ class Normalize(object): ...@@ -643,6 +738,51 @@ class Normalize(object):
self.std) self.std)
class DetectionNormalize(object):
"""Normalize a tensor image with mean and standard deviation.
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
will normalize each channel of the input ``torch.*Tensor`` i.e.
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
is_scale (bool): whether need im / 255
"""
def __init__(self, mean, std, is_scale=True):
self.mean = mean
self.std = std
self.is_scale = is_scale
def __call__(self, im, im_info=None):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.astype(np.float32, copy=False)
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im, im_info
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean,
self.std)
class Lambda(object): class Lambda(object):
"""Apply a user-defined lambda as a transform. """Apply a user-defined lambda as a transform.
Very shame to just copy from Very shame to just copy from
...@@ -716,6 +856,124 @@ class Resize(object): ...@@ -716,6 +856,124 @@ class Resize(object):
self.size, self.max_size, self.size, self.max_size,
_cv2_interpolation_to_str[self.interpolation]) _cv2_interpolation_to_str[self.interpolation])
class DetectionResize(object):
"""resize image by target_size and max_size
Args:
target_size (int): the target size of image
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): method of resize
"""
def __init__(self, target_size, keep_ratio=True, interpolation=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interpolation = interpolation
def __call__(self, im, im_info=None):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interpolation)
if im_info is not None:
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
def __repr__(self):
return self.__class__.__name__ + '(size={0}, max_size={1}, interpolation={2})'.format(
self.size, self.max_size,
_cv2_interpolation_to_str[self.interpolation])
class PadStride(object):
def __init__(self, stride):
self.coarsest_stride = stride
def __call__(self, img):
coarsest_stride = self.coarsest_stride
if coarsest_stride == 0:
return img
im_c, im_h, im_w = img.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
im_info = {}
im_info['resize_shape'] = padding_im.shape[1:]
return padding_im
class DetectionPadStride(object):
""" padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
Args:
stride (bool): model with FPN need image shape % stride == 0
"""
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info=None):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
class ResizeByFactor(object): class ResizeByFactor(object):
"""Resize the input numpy array Image to a size multiple of factor which is usually required by a network """Resize the input numpy array Image to a size multiple of factor which is usually required by a network
...@@ -768,24 +1026,6 @@ class ResizeByFactor(object): ...@@ -768,24 +1026,6 @@ class ResizeByFactor(object):
self.factor, self.max_side_len) self.factor, self.max_side_len)
class PadStride(object):
def __init__(self, stride):
self.coarsest_stride = stride
def __call__(self, img):
coarsest_stride = self.coarsest_stride
if coarsest_stride == 0:
return img
im_c, im_h, im_w = img.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
im_info = {}
im_info['resize_shape'] = padding_im.shape[1:]
return padding_im
class Transpose(object): class Transpose(object):
def __init__(self, transpose_target): def __init__(self, transpose_target):
self.transpose_target = transpose_target self.transpose_target = transpose_target
...@@ -799,6 +1039,19 @@ class Transpose(object): ...@@ -799,6 +1039,19 @@ class Transpose(object):
"({})".format(self.transpose_target) "({})".format(self.transpose_target)
return format_string return format_string
class DetectionTranspose(object):
def __init__(self, transpose_target):
self.transpose_target = transpose_target
def __call__(self, im, im_info=None):
im = F.transpose(im, self.transpose_target)
return im, im_info
def __repr__(self):
format_string = self.__class__.__name__ + \
"({})".format(self.transpose_target)
return format_string
class SortedBoxes(object): class SortedBoxes(object):
""" """
......
...@@ -31,15 +31,21 @@ sys.path.append( ...@@ -31,15 +31,21 @@ sys.path.append(
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) #param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16
#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8
#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0 int64_type = 0
float32_type = 1 float32_type = 1
int32_type = 2 int32_type = 2
bytes_type = 3 float16_type = 5
uint8_type = 7
int8_type = 8
bytes_type = 20
#int_type,float_type,string_type are the set of each subdivision classes. #int_type,float_type,string_type are the set of each subdivision classes.
int_type = set([int64_type, int32_type]) int_type = set([int64_type, int32_type])
float_type = set([float32_type]) float_type = set([float32_type])
string_type = set([bytes_type]) string_type = set([bytes_type, float16_type, uint8_type, int8_type])
class _NOPProfiler(object): class _NOPProfiler(object):
...@@ -289,31 +295,39 @@ class Client(object): ...@@ -289,31 +295,39 @@ class Client(object):
log_id=0): log_id=0):
self.profile_.record('py_prepro_0') self.profile_.record('py_prepro_0')
if feed is None or fetch is None: # fetch 可以为空,此时会取所有的输出结果
raise ValueError("You should specify feed and fetch for prediction") if feed is None:
raise ValueError("You should specify feed for prediction")
fetch_list = [] fetch_list = []
if isinstance(fetch, str): if isinstance(fetch, str):
fetch_list = [fetch] fetch_list = [fetch]
elif isinstance(fetch, list): elif isinstance(fetch, list):
fetch_list = fetch fetch_list = fetch
# fetch 可以为空,此时会取所有的输出结果
elif fetch == None:
pass
else: else:
raise ValueError("Fetch only accepts string and list of string") raise ValueError("Fetch only accepts string or list of string")
feed_batch = [] feed_batch = []
if isinstance(feed, dict): if isinstance(feed, dict):
feed_batch.append(feed) feed_batch.append(feed)
elif isinstance(feed, list): elif isinstance(feed, list):
# if input is a list and the number of feed_var is 1. # feed = [dict]
# create a temp_dict { key = feed_var_name, value = list} if len(feed) == 1 and isinstance(feed[0], dict):
# put the temp_dict into the feed_batch. feed_batch = feed
if len(self.feed_names_) != 1: else:
raise ValueError( # if input is a list and the number of feed_var is 1.
"input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list" # create a temp_dict { key = feed_var_name, value = list}
) # put the temp_dict into the feed_batch.
temp_dict = {} if len(self.feed_names_) != 1:
temp_dict[self.feed_names_[0]] = feed raise ValueError(
feed_batch.append(temp_dict) "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
)
temp_dict = {}
temp_dict[self.feed_names_[0]] = feed
feed_batch.append(temp_dict)
else: else:
raise ValueError("Feed only accepts dict and list of dict") raise ValueError("Feed only accepts dict and list of dict")
...@@ -321,10 +335,15 @@ class Client(object): ...@@ -321,10 +335,15 @@ class Client(object):
if len(feed_batch) != 1: if len(feed_batch) != 1:
raise ValueError("len of feed_batch can only be 1.") raise ValueError("len of feed_batch can only be 1.")
int_slot = [] int32_slot = []
int_feed_names = [] int32_feed_names = []
int_shape = [] int32_shape = []
int_lod_slot_batch = [] int32_lod_slot_batch = []
int64_slot = []
int64_feed_names = []
int64_shape = []
int64_lod_slot_batch = []
float_slot = [] float_slot = []
float_feed_names = [] float_feed_names = []
...@@ -341,10 +360,6 @@ class Client(object): ...@@ -341,10 +360,6 @@ class Client(object):
if key in self.fetch_names_: if key in self.fetch_names_:
fetch_names.append(key) fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
feed_dict = feed_batch[0] feed_dict = feed_batch[0]
for key in feed_dict: for key in feed_dict:
if ".lod" not in key and key not in self.feed_names_: if ".lod" not in key and key not in self.feed_names_:
...@@ -354,27 +369,39 @@ class Client(object): ...@@ -354,27 +369,39 @@ class Client(object):
self.shape_check(feed_dict, key) self.shape_check(feed_dict, key)
if self.feed_types_[key] in int_type: if self.feed_types_[key] in int_type:
int_feed_names.append(key)
shape_lst = [] shape_lst = []
if batch == False: if batch == False:
feed_dict[key] = np.expand_dims(feed_dict[key], 0).repeat( feed_dict[key] = np.expand_dims(feed_dict[key], 0).repeat(
1, axis=0) 1, axis=0)
if isinstance(feed_dict[key], np.ndarray): # verify different input int_type
shape_lst.extend(list(feed_dict[key].shape)) if(self.feed_types_[key] == int64_type):
int_shape.append(shape_lst) int64_feed_names.append(key)
else: if isinstance(feed_dict[key], np.ndarray):
int_shape.append(self.feed_shapes_[key]) shape_lst.extend(list(feed_dict[key].shape))
if "{}.lod".format(key) in feed_dict: int64_shape.append(shape_lst)
int_lod_slot_batch.append(feed_dict["{}.lod".format(key)]) self.has_numpy_input = True
else: else:
int_lod_slot_batch.append([]) int64_shape.append(self.feed_shapes_[key])
self.all_numpy_input = False
if isinstance(feed_dict[key], np.ndarray): if "{}.lod".format(key) in feed_dict:
int_slot.append(np.ascontiguousarray(feed_dict[key])) int64_lod_slot_batch.append(feed_dict["{}.lod".format(key)])
self.has_numpy_input = True else:
int64_lod_slot_batch.append([])
int64_slot.append(np.ascontiguousarray(feed_dict[key]))
else: else:
int_slot.append(np.ascontiguousarray(feed_dict[key])) int32_feed_names.append(key)
self.all_numpy_input = False if isinstance(feed_dict[key], np.ndarray):
shape_lst.extend(list(feed_dict[key].shape))
int32_shape.append(shape_lst)
self.has_numpy_input = True
else:
int32_shape.append(self.feed_shapes_[key])
self.all_numpy_input = False
if "{}.lod".format(key) in feed_dict:
int32_lod_slot_batch.append(feed_dict["{}.lod".format(key)])
else:
int32_lod_slot_batch.append([])
int32_slot.append(np.ascontiguousarray(feed_dict[key]))
elif self.feed_types_[key] in float_type: elif self.feed_types_[key] in float_type:
float_feed_names.append(key) float_feed_names.append(key)
...@@ -407,7 +434,10 @@ class Client(object): ...@@ -407,7 +434,10 @@ class Client(object):
key)]) key)])
else: else:
string_lod_slot_batch.append([]) string_lod_slot_batch.append([])
string_slot.append(feed_dict[key]) if type(feed_dict[key]) is np.ndarray:
string_slot.append(feed_dict[key].tostring())
else:
string_slot.append(feed_dict[key])
self.has_numpy_input = True self.has_numpy_input = True
self.profile_.record('py_prepro_1') self.profile_.record('py_prepro_1')
...@@ -417,7 +447,8 @@ class Client(object): ...@@ -417,7 +447,8 @@ class Client(object):
if self.all_numpy_input: if self.all_numpy_input:
res = self.client_handle_.numpy_predict( res = self.client_handle_.numpy_predict(
float_slot, float_feed_names, float_shape, float_lod_slot_batch, float_slot, float_feed_names, float_shape, float_lod_slot_batch,
int_slot, int_feed_names, int_shape, int_lod_slot_batch, int32_slot, int32_feed_names, int32_shape, int32_lod_slot_batch,
int64_slot, int64_feed_names, int64_shape, int64_lod_slot_batch,
string_slot, string_feed_names, string_shape, string_slot, string_feed_names, string_shape,
string_lod_slot_batch, fetch_names, result_batch_handle, string_lod_slot_batch, fetch_names, result_batch_handle,
self.pid, log_id) self.pid, log_id)
...@@ -439,6 +470,9 @@ class Client(object): ...@@ -439,6 +470,9 @@ class Client(object):
model_engine_names = result_batch_handle.get_engine_names() model_engine_names = result_batch_handle.get_engine_names()
for mi, engine_name in enumerate(model_engine_names): for mi, engine_name in enumerate(model_engine_names):
result_map = {} result_map = {}
# fetch 为空,则会取所有的输出结果
if len(fetch_names) == 0:
fetch_names = result_batch_handle.get_tensor_alias_names(mi)
# result map needs to be a numpy array # result map needs to be a numpy array
for i, name in enumerate(fetch_names): for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int64_type: if self.fetch_names_to_type_[name] == int64_type:
...@@ -485,6 +519,54 @@ class Client(object): ...@@ -485,6 +519,54 @@ class Client(object):
tmp_lod = result_batch_handle.get_lod(mi, name) tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0: if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == uint8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.uint8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.int8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float16_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.float16)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map) multi_result_map.append(result_map)
ret = None ret = None
if len(model_engine_names) == 1: if len(model_engine_names) == 1:
......
...@@ -23,6 +23,12 @@ from .io import inference_model_to_serving ...@@ -23,6 +23,12 @@ from .io import inference_model_to_serving
def parse_args(): # pylint: disable=doc-string-missing def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("convert") parser = argparse.ArgumentParser("convert")
parser.add_argument(
"--show_proto",
type=bool,
default=False,
help='If yes, you can preview the proto and then determine your feed var alias name and fetch var alias name.'
)
parser.add_argument( parser.add_argument(
"--dirname", "--dirname",
type=str, type=str,
...@@ -53,6 +59,18 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -53,6 +59,18 @@ def parse_args(): # pylint: disable=doc-string-missing
default=None, default=None,
help='The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.' help='The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.'
) )
parser.add_argument(
"--feed_alias_names",
type=str,
default=None,
help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of feed vars'
)
parser.add_argument(
"--fetch_alias_names",
type=str,
default=None,
help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of fetch vars'
)
return parser.parse_args() return parser.parse_args()
...@@ -63,4 +81,7 @@ if __name__ == "__main__": ...@@ -63,4 +81,7 @@ if __name__ == "__main__":
serving_server=args.serving_server, serving_server=args.serving_server,
serving_client=args.serving_client, serving_client=args.serving_client,
model_filename=args.model_filename, model_filename=args.model_filename,
params_filename=args.params_filename) params_filename=args.params_filename,
show_proto=args.show_proto,
feed_alias_names=args.feed_alias_names,
fetch_alias_names=args.fetch_alias_names)
...@@ -22,6 +22,7 @@ import gzip ...@@ -22,6 +22,7 @@ import gzip
from collections import Iterable from collections import Iterable
import base64 import base64
import sys import sys
import re
import grpc import grpc
from .proto import general_model_service_pb2 from .proto import general_model_service_pb2
...@@ -31,13 +32,18 @@ from .proto import general_model_service_pb2_grpc ...@@ -31,13 +32,18 @@ from .proto import general_model_service_pb2_grpc
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) #param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0 int64_type = 0
float32_type = 1 float32_type = 1
int32_type = 2 int32_type = 2
bytes_type = 3 bytes_type = 20
# this is corresponding to the proto # this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"] proto_data_key_list = {
0: "int64_data",
1: "float_data",
2: "int_data",
20: "data"
}
def list_flatten(items, ignore_types=(str, bytes)): def list_flatten(items, ignore_types=(str, bytes)):
...@@ -73,9 +79,9 @@ def data_bytes_number(datalist): ...@@ -73,9 +79,9 @@ def data_bytes_number(datalist):
# 可以直接调用需要的http_client_predict/grpc_client_predict # 可以直接调用需要的http_client_predict/grpc_client_predict
# 例如,如果想使用GRPC方式,set_use_grpc_client(True) # 例如,如果想使用GRPC方式,set_use_grpc_client(True)
# 或者直接调用grpc_client_predict() # 或者直接调用grpc_client_predict()
class GeneralClient(object): class HttpClient(object):
def __init__(self, def __init__(self,
ip="0.0.0.0", ip="127.0.0.1",
port="9393", port="9393",
service_name="/GeneralModelService/inference"): service_name="/GeneralModelService/inference"):
self.feed_names_ = [] self.feed_names_ = []
...@@ -84,7 +90,7 @@ class GeneralClient(object): ...@@ -84,7 +90,7 @@ class GeneralClient(object):
self.feed_shapes_ = {} self.feed_shapes_ = {}
self.feed_types_ = {} self.feed_types_ = {}
self.feed_names_to_idx_ = {} self.feed_names_to_idx_ = {}
self.timeout_ms = 200000 self.timeout_ms = 20000
self.ip = ip self.ip = ip
self.port = port self.port = port
self.server_port = port self.server_port = port
...@@ -93,9 +99,24 @@ class GeneralClient(object): ...@@ -93,9 +99,24 @@ class GeneralClient(object):
self.try_request_gzip = False self.try_request_gzip = False
self.try_response_gzip = False self.try_response_gzip = False
self.total_data_number = 0 self.total_data_number = 0
self.headers = {}
self.http_proto = True self.http_proto = True
self.headers["Content-Type"] = "application/proto"
self.max_body_size = 512 * 1024 * 1024 self.max_body_size = 512 * 1024 * 1024
self.use_grpc_client = False self.use_grpc_client = False
self.http_s = "http://"
# 使用连接池能够不用反复建立连接
self.requests_session = requests.session()
# 初始化grpc_stub
options = [('grpc.max_receive_message_length', self.max_body_size),
('grpc.max_send_message_length', self.max_body_size)]
endpoints = [self.ip + ":" + self.server_port]
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = general_model_service_pb2_grpc.GeneralModelServiceStub(
self.channel_)
def load_client_config(self, model_config_path_list): def load_client_config(self, model_config_path_list):
if isinstance(model_config_path_list, str): if isinstance(model_config_path_list, str):
...@@ -162,14 +183,57 @@ class GeneralClient(object): ...@@ -162,14 +183,57 @@ class GeneralClient(object):
else: else:
self.timeout_ms = timeout_ms self.timeout_ms = timeout_ms
def set_ip(self, ip): def set_max_retries(self, retry_times=3):
self.ip = ip if not isinstance(retry_times, int):
raise ValueError("retry_times must be int type.")
else:
self.requests_session.mount(
self.http_s, HTTPAdapter(max_retries=retry_times))
def set_service_name(self, service_name): def set_service_name(self, service_name):
self.service_name = service_name self.service_name = service_name
def set_port(self, port): def connect(self, url=None, encryption=False):
self.port = port if isinstance(url, (list, tuple)):
if len(url) > 1:
raise ValueError("HttpClient only support 1 endpoint")
else:
url = url[0]
if isinstance(url, str):
if url.startswith("https://"):
url = url[8:]
self.http_s = "https://"
if url.startswith("http://"):
url = url[7:]
self.http_s = "http://"
url_parts = url.split(':')
if len(url_parts) != 2 or self.check_ip(url_parts[0]) == False:
raise ValueError(
"url not right, it should be like 127.0.0.1:9393 or http://127.0.0.1:9393"
)
else:
self.ip = url_parts[0]
self.port = url_parts[1]
self.server_port = url_parts[1]
if encryption:
self.get_serving_port()
if self.use_grpc_client:
self.init_grpc_stub()
def check_ip(self, ipAddr):
compile_ip = re.compile(
'^(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[1-9])\.(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)$'
)
if compile_ip.match(ipAddr):
return True
else:
return False
def add_http_headers(self, headers):
if isinstance(headers, dict):
self.headers.update(headers)
else:
print("headers must be a dict")
def set_request_compress(self, try_request_gzip): def set_request_compress(self, try_request_gzip):
self.try_request_gzip = try_request_gzip self.try_request_gzip = try_request_gzip
...@@ -179,6 +243,10 @@ class GeneralClient(object): ...@@ -179,6 +243,10 @@ class GeneralClient(object):
def set_http_proto(self, http_proto): def set_http_proto(self, http_proto):
self.http_proto = http_proto self.http_proto = http_proto
if self.http_proto:
self.headers["Content-Type"] = "application/proto"
else:
self.headers["Content-Type"] = "application/json"
def set_use_grpc_client(self, use_grpc_client): def set_use_grpc_client(self, use_grpc_client):
self.use_grpc_client = use_grpc_client self.use_grpc_client = use_grpc_client
...@@ -187,21 +255,21 @@ class GeneralClient(object): ...@@ -187,21 +255,21 @@ class GeneralClient(object):
def use_key(self, key_filename): def use_key(self, key_filename):
with open(key_filename, "rb") as f: with open(key_filename, "rb") as f:
self.key = f.read() self.key = f.read()
self.get_serving_port()
def get_serving_port(self): def get_serving_port(self):
encrypt_url = "http://" + str(self.ip) + ":" + str(self.port) encrypt_url = self.http_s + str(self.ip) + ":" + str(self.port)
if self.key is not None: if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()}) req = json.dumps({"key": base64.b64encode(self.key).decode()})
else: else:
req = json.dumps({}) req = json.dumps({})
r = requests.post(encrypt_url, req) with requests.post(
result = r.json() encrypt_url, data=req, timeout=self.timeout_ms / 1000) as r:
if "endpoint_list" not in result: result = r.json()
raise ValueError("server not ready") if "endpoint_list" not in result:
else: raise ValueError("server not ready")
self.server_port = str(result["endpoint_list"][0]) else:
print("rpc port is ", self.server_port) self.server_port = str(result["endpoint_list"][0])
print("rpc port is ", self.server_port)
def get_feed_names(self): def get_feed_names(self):
return self.feed_names_ return self.feed_names_
...@@ -210,35 +278,34 @@ class GeneralClient(object): ...@@ -210,35 +278,34 @@ class GeneralClient(object):
return self.fetch_names_ return self.fetch_names_
def get_legal_fetch(self, fetch): def get_legal_fetch(self, fetch):
if fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = [] fetch_list = []
if isinstance(fetch, str): if isinstance(fetch, str):
fetch_list = [fetch] fetch_list = [fetch]
elif isinstance(fetch, (list, tuple)): elif isinstance(fetch, (list, tuple)):
fetch_list = fetch fetch_list = fetch
elif fetch == None:
pass
else: else:
raise ValueError("Fetch only accepts string and list of string") raise ValueError("Fetch only accepts string/list/tuple of string")
fetch_names = [] fetch_names = []
for key in fetch_list: for key in fetch_list:
if key in self.fetch_names_: if key in self.fetch_names_:
fetch_names.append(key) fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
return fetch_names return fetch_names
def get_feedvar_dict(self, feed): def get_feedvar_dict(self, feed):
if feed is None: if feed is None:
raise ValueError("You should specify feed and fetch for prediction") raise ValueError("You should specify feed for prediction")
feed_dict = {} feed_dict = {}
if isinstance(feed, dict): if isinstance(feed, dict):
feed_dict = feed feed_dict = feed
elif isinstance(feed, (list, str, tuple)): elif isinstance(feed, (list, str, tuple)):
# feed = [dict]
if len(feed) == 1 and isinstance(feed[0], dict):
feed_dict = feed[0]
return feed_dict
# if input is a list or str or tuple, and the number of feed_var is 1. # if input is a list or str or tuple, and the number of feed_var is 1.
# create a feed_dict { key = feed_var_name, value = list} # create a feed_dict { key = feed_var_name, value = list}
if len(self.feed_names_) == 1: if len(self.feed_names_) == 1:
...@@ -376,17 +443,19 @@ class GeneralClient(object): ...@@ -376,17 +443,19 @@ class GeneralClient(object):
# 此时先统一处理为一个list # 此时先统一处理为一个list
# 由于输入比较特殊,shape保持原feedvar中不变 # 由于输入比较特殊,shape保持原feedvar中不变
data_value = [] data_value = []
data_value.append(feed_dict[key]) if isinstance(feed_dict[key], (str, bytes)):
if isinstance(feed_dict[key], str):
if self.feed_types_[key] != bytes_type: if self.feed_types_[key] != bytes_type:
raise ValueError( raise ValueError(
"feedvar is not string-type,feed can`t be a single string." "feedvar is not string-type,feed can`t be a single string."
) )
if isinstance(feed_dict[key], bytes):
feed_dict[key] = feed_dict[key].decode()
else: else:
if self.feed_types_[key] == bytes_type: if self.feed_types_[key] == bytes_type:
raise ValueError( raise ValueError(
"feedvar is string-type,feed, feed can`t be a single int or others." "feedvar is string-type,feed can`t be a single int or others."
) )
data_value.append(feed_dict[key])
# 如果不压缩,那么不需要统计数据量。 # 如果不压缩,那么不需要统计数据量。
if self.try_request_gzip: if self.try_request_gzip:
self.total_data_number = self.total_data_number + data_bytes_number( self.total_data_number = self.total_data_number + data_bytes_number(
...@@ -427,36 +496,42 @@ class GeneralClient(object): ...@@ -427,36 +496,42 @@ class GeneralClient(object):
feed_dict = self.get_feedvar_dict(feed) feed_dict = self.get_feedvar_dict(feed)
fetch_list = self.get_legal_fetch(fetch) fetch_list = self.get_legal_fetch(fetch)
headers = {}
postData = '' postData = ''
if self.http_proto == True: if self.http_proto == True:
postData = self.process_proto_data(feed_dict, fetch_list, batch, postData = self.process_proto_data(feed_dict, fetch_list, batch,
log_id).SerializeToString() log_id).SerializeToString()
headers["Content-Type"] = "application/proto"
else: else:
postData = self.process_json_data(feed_dict, fetch_list, batch, postData = self.process_json_data(feed_dict, fetch_list, batch,
log_id) log_id)
headers["Content-Type"] = "application/json"
web_url = "http://" + self.ip + ":" + self.server_port + self.service_name web_url = self.http_s + self.ip + ":" + self.server_port + self.service_name
# 当数据区长度大于512字节时才压缩. # 当数据区长度大于512字节时才压缩.
self.headers.pop("Content-Encoding", "nokey")
try: try:
if self.try_request_gzip and self.total_data_number > 512: if self.try_request_gzip and self.total_data_number > 512:
origin_data = postData
postData = gzip.compress(bytes(postData, 'utf-8')) if self.http_proto:
headers["Content-Encoding"] = "gzip" postData = gzip.compress(postData)
else:
postData = gzip.compress(bytes(postData, 'utf-8'))
self.headers["Content-Encoding"] = "gzip"
if self.try_response_gzip: if self.try_response_gzip:
headers["Accept-encoding"] = "gzip" self.headers["Accept-encoding"] = "gzip"
# 压缩异常,使用原始数据 # 压缩异常,使用原始数据
except: except:
print("compress error, we will use the no-compress data") print("compress error, we will use the no-compress data")
headers.pop("Content-Encoding", "nokey") self.headers.pop("Content-Encoding", "nokey")
postData = origin_data
# requests支持自动识别解压 # requests支持自动识别解压
try: try:
result = requests.post(url=web_url, headers=headers, data=postData) result = self.requests_session.post(
url=web_url,
headers=self.headers,
data=postData,
timeout=self.timeout_ms / 1000,
verify=False)
result.raise_for_status()
except: except:
print("http post error") print("http post error")
return None return None
...@@ -484,6 +559,16 @@ class GeneralClient(object): ...@@ -484,6 +559,16 @@ class GeneralClient(object):
postData = self.process_proto_data(feed_dict, fetch_list, batch, log_id) postData = self.process_proto_data(feed_dict, fetch_list, batch, log_id)
try:
resp = self.stub_.inference(
postData, timeout=self.timeout_ms / 1000)
except:
print("Grpc inference error occur")
return None
else:
return resp
def init_grpc_stub(self):
# https://github.com/tensorflow/serving/issues/1382 # https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', self.max_body_size), options = [('grpc.max_receive_message_length', self.max_body_size),
('grpc.max_send_message_length', self.max_body_size)] ('grpc.max_send_message_length', self.max_body_size)]
...@@ -493,10 +578,7 @@ class GeneralClient(object): ...@@ -493,10 +578,7 @@ class GeneralClient(object):
self.channel_ = grpc.insecure_channel(g_endpoint, options=options) self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = general_model_service_pb2_grpc.GeneralModelServiceStub( self.stub_ = general_model_service_pb2_grpc.GeneralModelServiceStub(
self.channel_) self.channel_)
try:
resp = self.stub_.inference(postData, timeout=self.timeout_ms) def __del__(self):
except: self.requests_session.close()
print("Grpc inference error occur") self.channel_.close()
return None
else:
return resp
...@@ -67,7 +67,6 @@ def save_dygraph_model(serving_model_folder, client_config_folder, model): ...@@ -67,7 +67,6 @@ def save_dygraph_model(serving_model_folder, client_config_folder, model):
} }
config = model_conf.GeneralModelConfig() config = model_conf.GeneralModelConfig()
#int64 = 0; float32 = 1; int32 = 2;
for key in feed_var_dict: for key in feed_var_dict:
feed_var = model_conf.FeedVar() feed_var = model_conf.FeedVar()
feed_var.alias_name = key feed_var.alias_name = key
...@@ -127,7 +126,6 @@ def save_dygraph_model(serving_model_folder, client_config_folder, model): ...@@ -127,7 +126,6 @@ def save_dygraph_model(serving_model_folder, client_config_folder, model):
def var_type_conversion(dtype): def var_type_conversion(dtype):
""" """
Variable type conversion Variable type conversion
Args: Args:
dtype: type of core.VarDesc.VarType.xxxxx dtype: type of core.VarDesc.VarType.xxxxx
(https://github.com/PaddlePaddle/Paddle/blob/release/2.1/python/paddle/framework/dtype.py) (https://github.com/PaddlePaddle/Paddle/blob/release/2.1/python/paddle/framework/dtype.py)
...@@ -184,7 +182,12 @@ def save_model(server_model_folder, ...@@ -184,7 +182,12 @@ def save_model(server_model_folder,
main_program=None, main_program=None,
encryption=False, encryption=False,
key_len=128, key_len=128,
encrypt_conf=None): encrypt_conf=None,
model_filename=None,
params_filename=None,
show_proto=False,
feed_alias_names=None,
fetch_alias_names=None):
executor = Executor(place=CPUPlace()) executor = Executor(place=CPUPlace())
feed_var_names = [feed_var_dict[x].name for x in feed_var_dict] feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
...@@ -194,16 +197,30 @@ def save_model(server_model_folder, ...@@ -194,16 +197,30 @@ def save_model(server_model_folder,
target_vars.append(fetch_var_dict[key]) target_vars.append(fetch_var_dict[key])
target_var_names.append(key) target_var_names.append(key)
if not encryption: if not encryption and not show_proto:
save_inference_model( if not os.path.exists(server_model_folder):
server_model_folder, os.makedirs(server_model_folder)
feed_var_names, if not model_filename:
target_vars, model_filename = "model.pdmodel"
executor, if not params_filename:
model_filename="__model__", params_filename = "params.pdiparams"
params_filename="__params__",
main_program=main_program) new_model_path = os.path.join(server_model_folder, model_filename)
else: new_params_path = os.path.join(server_model_folder, params_filename)
with open(new_model_path, "wb") as new_model_file:
new_model_file.write(main_program.desc.serialize_to_string())
paddle.static.save_vars(
executor=executor,
dirname=server_model_folder,
main_program=main_program,
vars=None,
predicate=paddle.static.io.is_persistable,
filename=params_filename)
elif not show_proto:
if not os.path.exists(server_model_folder):
os.makedirs(server_model_folder)
if encrypt_conf == None: if encrypt_conf == None:
aes_cipher = CipherFactory.create_cipher() aes_cipher = CipherFactory.create_cipher()
else: else:
...@@ -221,10 +238,19 @@ def save_model(server_model_folder, ...@@ -221,10 +238,19 @@ def save_model(server_model_folder,
os.chdir("..") os.chdir("..")
config = model_conf.GeneralModelConfig() config = model_conf.GeneralModelConfig()
if feed_alias_names is None:
for key in feed_var_dict: feed_alias = list(feed_var_dict.keys())
else:
feed_alias = feed_alias_names.split(',')
if fetch_alias_names is None:
fetch_alias = target_var_names
else:
fetch_alias = fetch_alias_names.split(',')
if len(feed_alias) != len(feed_var_dict.keys()) or len(fetch_alias) != len(target_var_names):
raise ValueError("please check the input --feed_alias_names and --fetch_alias_names, should be same size with feed_vars and fetch_vars")
for i, key in enumerate(feed_var_dict):
feed_var = model_conf.FeedVar() feed_var = model_conf.FeedVar()
feed_var.alias_name = key feed_var.alias_name = feed_alias[i]
feed_var.name = feed_var_dict[key].name feed_var.name = feed_var_dict[key].name
feed_var.feed_type = var_type_conversion(feed_var_dict[key].dtype) feed_var.feed_type = var_type_conversion(feed_var_dict[key].dtype)
...@@ -239,9 +265,9 @@ def save_model(server_model_folder, ...@@ -239,9 +265,9 @@ def save_model(server_model_folder,
feed_var.shape.extend(tmp_shape) feed_var.shape.extend(tmp_shape)
config.feed_var.extend([feed_var]) config.feed_var.extend([feed_var])
for key in target_var_names: for i, key in enumerate(target_var_names):
fetch_var = model_conf.FetchVar() fetch_var = model_conf.FetchVar()
fetch_var.alias_name = key fetch_var.alias_name = fetch_alias[i]
fetch_var.name = fetch_var_dict[key].name fetch_var.name = fetch_var_dict[key].name
fetch_var.fetch_type = var_type_conversion(fetch_var_dict[key].dtype) fetch_var.fetch_type = var_type_conversion(fetch_var_dict[key].dtype)
...@@ -257,6 +283,9 @@ def save_model(server_model_folder, ...@@ -257,6 +283,9 @@ def save_model(server_model_folder,
fetch_var.shape.extend(tmp_shape) fetch_var.shape.extend(tmp_shape)
config.fetch_var.extend([fetch_var]) config.fetch_var.extend([fetch_var])
if show_proto:
print(str(config))
return
try: try:
save_dirname = os.path.normpath(client_config_folder) save_dirname = os.path.normpath(client_config_folder)
os.makedirs(save_dirname) os.makedirs(save_dirname)
...@@ -284,7 +313,10 @@ def inference_model_to_serving(dirname, ...@@ -284,7 +313,10 @@ def inference_model_to_serving(dirname,
params_filename=None, params_filename=None,
encryption=False, encryption=False,
key_len=128, key_len=128,
encrypt_conf=None): encrypt_conf=None,
show_proto=False,
feed_alias_names=None,
fetch_alias_names=None):
paddle.enable_static() paddle.enable_static()
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -296,7 +328,8 @@ def inference_model_to_serving(dirname, ...@@ -296,7 +328,8 @@ def inference_model_to_serving(dirname,
} }
fetch_dict = {x.name: x for x in fetch_targets} fetch_dict = {x.name: x for x in fetch_targets}
save_model(serving_server, serving_client, feed_dict, fetch_dict, save_model(serving_server, serving_client, feed_dict, fetch_dict,
inference_program, encryption, key_len, encrypt_conf) inference_program, encryption, key_len, encrypt_conf,
model_filename, params_filename, show_proto, feed_alias_names, fetch_alias_names)
feed_names = feed_dict.keys() feed_names = feed_dict.keys()
fetch_names = fetch_dict.keys() fetch_names = fetch_dict.keys()
return feed_names, fetch_names return feed_names, fetch_names
...@@ -96,7 +96,7 @@ if __name__ == "__main__": ...@@ -96,7 +96,7 @@ if __name__ == "__main__":
args = parse_args() args = parse_args()
benchmark_cfg_filename = args.benchmark_cfg benchmark_cfg_filename = args.benchmark_cfg
f = open(benchmark_cfg_filename, 'r') f = open(benchmark_cfg_filename, 'r')
benchmark_config = yaml.load(f) benchmark_config = yaml.load(f, yaml.FullLoader)
f.close() f.close()
benchmark_log_filename = args.benchmark_log benchmark_log_filename = args.benchmark_log
f = open(benchmark_log_filename, 'r') f = open(benchmark_log_filename, 'r')
......
...@@ -37,7 +37,7 @@ import socket ...@@ -37,7 +37,7 @@ import socket
def port_is_available(port): def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2) sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port)) result = sock.connect_ex(('127.0.0.1', port))
if result != 0: if result != 0:
return True return True
else: else:
......
...@@ -228,7 +228,8 @@ class Server(object): ...@@ -228,7 +228,8 @@ class Server(object):
engine.batch_infer_size = self.op_max_batch[index % engine.batch_infer_size = self.op_max_batch[index %
len(self.op_max_batch)] len(self.op_max_batch)]
engine.enable_batch_align = 1 engine.enable_overrun = False
engine.allow_split_request = True
engine.model_dir = model_config_path engine.model_dir = model_config_path
engine.enable_memory_optimization = self.memory_optimization engine.enable_memory_optimization = self.memory_optimization
engine.enable_ir_optimization = self.ir_optimization engine.enable_ir_optimization = self.ir_optimization
...@@ -537,7 +538,7 @@ class Server(object): ...@@ -537,7 +538,7 @@ class Server(object):
def port_is_available(self, port): def port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2) sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port)) result = sock.connect_ex(('127.0.0.1', port))
if result != 0: if result != 0:
return True return True
else: else:
...@@ -563,7 +564,7 @@ class Server(object): ...@@ -563,7 +564,7 @@ class Server(object):
"-num_threads {} " \ "-num_threads {} " \
"-port {} " \ "-port {} " \
"-precision {} " \ "-precision {} " \
"-use_calib {} " \ "-use_calib={} " \
"-reload_interval_s {} " \ "-reload_interval_s {} " \
"-resource_path {} " \ "-resource_path {} " \
"-resource_file {} " \ "-resource_file {} " \
......
...@@ -33,7 +33,7 @@ from paddle_serving_server.serve import format_gpu_to_strlist ...@@ -33,7 +33,7 @@ from paddle_serving_server.serve import format_gpu_to_strlist
def port_is_available(port): def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2) sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port)) result = sock.connect_ex(('127.0.0.1', port))
if result != 0: if result != 0:
return True return True
else: else:
......
...@@ -274,7 +274,7 @@ class OpAnalyst(object): ...@@ -274,7 +274,7 @@ class OpAnalyst(object):
""" """
import yaml import yaml
with open(op_config_yaml) as f: with open(op_config_yaml) as f:
op_config = yaml.load(f) op_config = yaml.load(f, yaml.FullLoader)
# check that each model is deployed on a different card # check that each model is deployed on a different card
card_set = set() card_set = set()
......
...@@ -28,6 +28,7 @@ import logging ...@@ -28,6 +28,7 @@ import logging
import enum import enum
import os import os
import copy import copy
import time
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
...@@ -45,7 +46,9 @@ class ChannelDataErrcode(enum.Enum): ...@@ -45,7 +46,9 @@ class ChannelDataErrcode(enum.Enum):
CLOSED_ERROR = 6 CLOSED_ERROR = 6
NO_SERVICE = 7 NO_SERVICE = 7
UNKNOW = 8 UNKNOW = 8
PRODUCT_ERROR = 9 INPUT_PARAMS_ERROR = 9
PRODUCT_ERROR = 100
class ProductErrCode(enum.Enum): class ProductErrCode(enum.Enum):
...@@ -124,7 +127,6 @@ class ChannelData(object): ...@@ -124,7 +127,6 @@ class ChannelData(object):
def get_size(self): def get_size(self):
size = 0 size = 0
dict_data = None
if isinstance(self.dictdata, dict): if isinstance(self.dictdata, dict):
for k in self.dictdata: for k in self.dictdata:
size += sys.getsizeof(self.dictdata[k]) + sys.getsizeof(k) size += sys.getsizeof(self.dictdata[k]) + sys.getsizeof(k)
...@@ -259,7 +261,11 @@ class ProcessChannel(object): ...@@ -259,7 +261,11 @@ class ProcessChannel(object):
maintains the data obtained from queue. maintains the data obtained from queue.
""" """
def __init__(self, manager, name=None, maxsize=0): def __init__(self,
manager,
name=None,
maxsize=0,
channel_recv_frist_arrive=False):
# For queue multiprocess: after putting an object on # For queue multiprocess: after putting an object on
# an empty queue there may be an infinitessimal delay # an empty queue there may be an infinitessimal delay
# before the queue's :meth:`~Queue.empty` # before the queue's :meth:`~Queue.empty`
...@@ -283,6 +289,9 @@ class ProcessChannel(object): ...@@ -283,6 +289,9 @@ class ProcessChannel(object):
self._base_cursor = manager.Value('i', 0) self._base_cursor = manager.Value('i', 0)
self._output_buf = manager.list() self._output_buf = manager.list()
self._cur_max_dataid = manager.Value('i', -1)
self._channel_recv_frist_arrive = channel_recv_frist_arrive
def get_maxsize(self): def get_maxsize(self):
return self._maxsize return self._maxsize
...@@ -325,9 +334,10 @@ class ProcessChannel(object): ...@@ -325,9 +334,10 @@ class ProcessChannel(object):
def push(self, channeldata, op_name=None): def push(self, channeldata, op_name=None):
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"(data_id={} log_id={}) Op({}) Enter channel::push producers:{}". "(data_id={} log_id={}) Op({}) Enter channel::push producers:{}, time:{}".
format(channeldata.id, channeldata.log_id, op_name, format(channeldata.id, channeldata.log_id, op_name,
len(self._producers)))) len(self._producers), time.time())))
if len(self._producers) == 0: if len(self._producers) == 0:
_LOGGER.critical( _LOGGER.critical(
self._log( self._log(
...@@ -355,16 +365,55 @@ class ProcessChannel(object): ...@@ -355,16 +365,55 @@ class ProcessChannel(object):
self._cv.notify_all() self._cv.notify_all()
notify_all_time = _time() notify_all_time = _time()
_LOGGER.debug( _LOGGER.debug(
"(data_id={}) Op({}) channel push cost! enter_cv:{} ms, push_que:{} ms, notify:{} ms, data_size:{}". "(data_id={}) Op({}) channel push cost! enter_cv:{} ms, push_que:{} ms, notify:{} ms, data_size:{}, time:{}".
format(channeldata.id, op_name, (enter_cv_time - start_time) format(channeldata.id, op_name, (enter_cv_time - start_time)
* 1000, (push_que_time - enter_cv_time) * 1000, ( * 1000, (push_que_time - enter_cv_time) * 1000, (
notify_all_time - push_que_time) * 1000, notify_all_time - push_que_time) * 1000,
channeldata.get_size())) channeldata.get_size(), time.time()))
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"(data_id={} log_id={}) Op({}) Pushed data into internal queue.". "(data_id={} log_id={}) Op({}) Pushed data into internal queue.".
format(channeldata.id, channeldata.log_id, op_name))) format(channeldata.id, channeldata.log_id, op_name)))
return True return True
elif self._channel_recv_frist_arrive == True:
start_time = _time()
with self._cv:
_LOGGER.debug(
"(data_id={}) Op({}) Channel({}) enter channel_recv_first_arrive. _cur_max_dataid:{}".
format(channeldata.id, op_name, self.name,
self._cur_max_dataid.value))
if channeldata.id > self._cur_max_dataid.value:
enter_cv_time = _time()
push_que_time = enter_cv_time
while self._stop.value == 0:
try:
self._que.put((channeldata.id, {
op_name: channeldata
}),
timeout=0)
push_que_time = _time()
self._cur_max_dataid.value = channeldata.id
break
except Queue.Full:
self._cv.wait()
if self._stop.value == 1:
raise ChannelStopError()
self._cv.notify_all()
notify_all_time = _time()
_LOGGER.debug(
"(data_id={}) Op({}) channel push cost! enter_cv:{} ms, push_que:{} ms, notify:{} ms, data_size:{}, time:{}".
format(channeldata.id, op_name, (
enter_cv_time - start_time) * 1000, (
push_que_time - enter_cv_time) * 1000, (
notify_all_time - push_que_time) * 1000,
channeldata.get_size(), time.time()))
else:
# log and drop it
_LOGGER.debug(
"(data_id={}) Op({}) send data is dropped! cur_max_dataid:{}".
format(channeldata.id, op_name,
self._cur_max_dataid.value))
return True
elif op_name is None: elif op_name is None:
_LOGGER.critical( _LOGGER.critical(
self._log( self._log(
...@@ -414,8 +463,8 @@ class ProcessChannel(object): ...@@ -414,8 +463,8 @@ class ProcessChannel(object):
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"(data_id={} log_id={}) Op({}) Pushed data into internal_queue.". "(data_id={} log_id={}) Op({}) Pushed data into internal_queue. time:{}".
format(data_id, log_id, op_name))) format(data_id, log_id, op_name, time.time())))
self._cv.notify_all() self._cv.notify_all()
return True return True
...@@ -464,9 +513,9 @@ class ProcessChannel(object): ...@@ -464,9 +513,9 @@ class ProcessChannel(object):
key = list(resp.keys())[0] key = list(resp.keys())[0]
data_id = resp[key].id data_id = resp[key].id
_LOGGER.debug( _LOGGER.debug(
"(data_id={}) op({}) front cost enter_cv:{} ms, queue_get:{} ms". "(data_id={}) op({}) front cost enter_cv:{} ms, queue_get:{} ms, time:{}".
format(data_id, op_name, (time_2 - time_1) / 1000.0, ( format(data_id, op_name, (time_2 - time_1) / 1000.0, (
time_3 - time_2) / 1000.0)) time_3 - time_2) / 1000.0, time.time()))
if resp is not None: if resp is not None:
list_values = list(resp.values()) list_values = list(resp.values())
_LOGGER.debug( _LOGGER.debug(
...@@ -501,9 +550,9 @@ class ProcessChannel(object): ...@@ -501,9 +550,9 @@ class ProcessChannel(object):
list_values = list(channeldata.values()) list_values = list(channeldata.values())
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"(data_id={} log_id={}) Op({}) Pop ready item into output_buffer". "(data_id={} log_id={}) Op({}) Pop ready item into output_buffer, time:{}".
format(list_values[0].id, list_values[0].log_id, format(list_values[0].id, list_values[0].log_id,
op_name))) op_name, time.time())))
break break
except Queue.Empty: except Queue.Empty:
if timeout is not None: if timeout is not None:
...@@ -561,8 +610,9 @@ class ProcessChannel(object): ...@@ -561,8 +610,9 @@ class ProcessChannel(object):
list_values = list(resp.values()) list_values = list(resp.values())
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"(data_id={} log_id={}) Op({}) Got data from output_buffer". "(data_id={} log_id={}) Op({}) Got data from output_buffer, time:{}".
format(list_values[0].id, list_values[0].log_id, op_name))) format(list_values[0].id, list_values[0].log_id, op_name,
time.time())))
return resp return resp
def stop(self): def stop(self):
...@@ -601,7 +651,7 @@ class ThreadChannel(Queue.PriorityQueue): ...@@ -601,7 +651,7 @@ class ThreadChannel(Queue.PriorityQueue):
maintains the data obtained from queue. maintains the data obtained from queue.
""" """
def __init__(self, name=None, maxsize=-1): def __init__(self, name=None, maxsize=-1, channel_recv_frist_arrive=False):
Queue.Queue.__init__(self, maxsize=maxsize) Queue.Queue.__init__(self, maxsize=maxsize)
self._maxsize = maxsize self._maxsize = maxsize
self.name = name self.name = name
...@@ -619,6 +669,9 @@ class ThreadChannel(Queue.PriorityQueue): ...@@ -619,6 +669,9 @@ class ThreadChannel(Queue.PriorityQueue):
self._base_cursor = 0 self._base_cursor = 0
self._output_buf = [] self._output_buf = []
self._channel_recv_frist_arrive = channel_recv_frist_arrive
self._cur_max_dataid = -1
def get_maxsize(self): def get_maxsize(self):
return self._maxsize return self._maxsize
...@@ -662,6 +715,7 @@ class ThreadChannel(Queue.PriorityQueue): ...@@ -662,6 +715,7 @@ class ThreadChannel(Queue.PriorityQueue):
_LOGGER.debug( _LOGGER.debug(
self._log("(data_id={} log_id={}) Op({}) Pushing data".format( self._log("(data_id={} log_id={}) Op({}) Pushing data".format(
channeldata.id, channeldata.log_id, op_name))) channeldata.id, channeldata.log_id, op_name)))
if len(self._producers) == 0: if len(self._producers) == 0:
_LOGGER.critical( _LOGGER.critical(
self._log( self._log(
...@@ -688,6 +742,29 @@ class ThreadChannel(Queue.PriorityQueue): ...@@ -688,6 +742,29 @@ class ThreadChannel(Queue.PriorityQueue):
"(data_id={} log_id={}) Op({}) Pushed data into internal_queue.". "(data_id={} log_id={}) Op({}) Pushed data into internal_queue.".
format(channeldata.id, channeldata.log_id, op_name))) format(channeldata.id, channeldata.log_id, op_name)))
return True return True
elif self._channel_recv_frist_arrive is True:
with self._cv:
if channeldata.id > self._cur_max_dataid:
while self._stop is False:
try:
self.put((channeldata.id, {
op_name: channeldata
}),
timeout=0)
self._cur_max_dataid = channeldata.id
break
except Queue.Full:
self._cv.wait()
if self._stop:
raise ChannelStopError()
self._cv.notify_all()
else:
# log and drop it
_LOGGER.debug(
"(data_id={}) Op({}) send data is dropped! cur_max_dataid:{}".
format(channeldata.id, op_name, self._cur_max_dataid))
return True
elif op_name is None: elif op_name is None:
_LOGGER.critical( _LOGGER.critical(
self._log( self._log(
......
...@@ -63,6 +63,7 @@ class DAGExecutor(object): ...@@ -63,6 +63,7 @@ class DAGExecutor(object):
self._retry = dag_conf["retry"] self._retry = dag_conf["retry"]
self._server_use_profile = dag_conf["use_profile"] self._server_use_profile = dag_conf["use_profile"]
channel_size = dag_conf["channel_size"] channel_size = dag_conf["channel_size"]
channel_recv_frist_arrive = dag_conf["channel_recv_frist_arrive"]
self._is_thread_op = dag_conf["is_thread_op"] self._is_thread_op = dag_conf["is_thread_op"]
tracer_conf = dag_conf["tracer"] tracer_conf = dag_conf["tracer"]
...@@ -79,7 +80,7 @@ class DAGExecutor(object): ...@@ -79,7 +80,7 @@ class DAGExecutor(object):
self._dag = DAG(self.name, response_op, self._server_use_profile, self._dag = DAG(self.name, response_op, self._server_use_profile,
self._is_thread_op, channel_size, build_dag_each_worker, self._is_thread_op, channel_size, build_dag_each_worker,
self._tracer) self._tracer, channel_recv_frist_arrive)
(in_channel, out_channel, pack_rpc_func, (in_channel, out_channel, pack_rpc_func,
unpack_rpc_func) = self._dag.build() unpack_rpc_func) = self._dag.build()
self._dag.start() self._dag.start()
...@@ -480,7 +481,8 @@ class DAG(object): ...@@ -480,7 +481,8 @@ class DAG(object):
""" """
def __init__(self, request_name, response_op, use_profile, is_thread_op, def __init__(self, request_name, response_op, use_profile, is_thread_op,
channel_size, build_dag_each_worker, tracer): channel_size, build_dag_each_worker, tracer,
channel_recv_frist_arrive):
self._request_name = request_name self._request_name = request_name
self._response_op = response_op self._response_op = response_op
self._use_profile = use_profile self._use_profile = use_profile
...@@ -488,6 +490,7 @@ class DAG(object): ...@@ -488,6 +490,7 @@ class DAG(object):
self._channel_size = channel_size self._channel_size = channel_size
self._build_dag_each_worker = build_dag_each_worker self._build_dag_each_worker = build_dag_each_worker
self._tracer = tracer self._tracer = tracer
self._channel_recv_frist_arrive = channel_recv_frist_arrive
if not self._is_thread_op: if not self._is_thread_op:
self._manager = PipelineProcSyncManager() self._manager = PipelineProcSyncManager()
_LOGGER.info("[DAG] Succ init") _LOGGER.info("[DAG] Succ init")
...@@ -543,10 +546,15 @@ class DAG(object): ...@@ -543,10 +546,15 @@ class DAG(object):
channel = None channel = None
if self._is_thread_op: if self._is_thread_op:
channel = ThreadChannel( channel = ThreadChannel(
name=name_gen.next(), maxsize=self._channel_size) name=name_gen.next(),
maxsize=self._channel_size,
channel_recv_frist_arrive=self._channel_recv_frist_arrive)
else: else:
channel = ProcessChannel( channel = ProcessChannel(
self._manager, name=name_gen.next(), maxsize=self._channel_size) self._manager,
name=name_gen.next(),
maxsize=self._channel_size,
channel_recv_frist_arrive=self._channel_recv_frist_arrive)
_LOGGER.debug("[DAG] Generate channel: {}".format(channel.name)) _LOGGER.debug("[DAG] Generate channel: {}".format(channel.name))
return channel return channel
......
...@@ -18,22 +18,117 @@ option go_package = "./;pipeline_serving"; ...@@ -18,22 +18,117 @@ option go_package = "./;pipeline_serving";
import "google/api/annotations.proto"; import "google/api/annotations.proto";
// Tensor structure, consistent with PADDLE variable types.
// Descriptions of input and output data.
message Tensor {
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32, FP16
repeated float float_data = 2;
// VarType: INT32, INT16, INT8
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: BF16, UINT8
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string str_data = 9;
// VarType: BYTES, is suitable for big data. No need to save data types and
// dimensions
// pack method: pack by BytesIO, saved by np.save
// unpack method: load by np.load, unpack by BytesIO.
bytes byte_data = 10;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 12 => STRING
// 13 => BYTES
int32 elem_type = 20;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 21;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 22;
// Correspond to the variable 'name' in the model description prototxt.
string name = 23;
};
// The structure of the service request. The input data can be repeated string
// pairs or tensors.
message Request {
// The input data are repeated string pairs.
// for examples. key is "words", value is the string of words.
repeated string key = 1;
repeated string value = 2;
// The input data are repeated tensors for complex data structures.
// Becase tensors can save more data information and reduce the amount of data
// transferred.
repeated Tensor tensors = 3;
// The name field in the RESTful API
string name = 4;
// The method field in the RESTful API
string method = 5;
// For tracing requests and logs
int64 logid = 6;
// For tracking sources
string clientip = 7;
};
// The structure of the service response. The output data can be repeated string
// pairs or tensors.
message Response { message Response {
// Error code
int32 err_no = 1; int32 err_no = 1;
// Error messages
string err_msg = 2; string err_msg = 2;
// The results of string pairs
repeated string key = 3; repeated string key = 3;
repeated string value = 4; repeated string value = 4;
};
message Request { // The results of tensors
repeated string key = 1; repeated Tensor tensors = 5;
repeated string value = 2;
string name = 3;
string method = 4;
int64 logid = 5;
string clientip = 6;
}; };
// Python pipeline service
service PipelineService { service PipelineService {
rpc inference(Request) returns (Response) { rpc inference(Request) returns (Response) {
option (google.api.http) = { option (google.api.http) = {
......
...@@ -26,6 +26,7 @@ import collections ...@@ -26,6 +26,7 @@ import collections
import numpy as np import numpy as np
import json import json
from numpy import * from numpy import *
from io import BytesIO
if sys.version_info.major == 2: if sys.version_info.major == 2:
import Queue import Queue
elif sys.version_info.major == 3: elif sys.version_info.major == 3:
...@@ -40,10 +41,29 @@ from .channel import (ThreadChannel, ProcessChannel, ChannelDataErrcode, ...@@ -40,10 +41,29 @@ from .channel import (ThreadChannel, ProcessChannel, ChannelDataErrcode,
from .util import NameGenerator from .util import NameGenerator
from .profiler import UnsafeTimeProfiler as TimeProfiler from .profiler import UnsafeTimeProfiler as TimeProfiler
from . import local_service_handler from . import local_service_handler
from .pipeline_client import PipelineClient as PPClient
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
_op_name_gen = NameGenerator("Op") _op_name_gen = NameGenerator("Op")
# data type of tensor to numpy_data
_TENSOR_DTYPE_2_NUMPY_DATA_DTYPE = {
0: "int64", # VarType.INT64
1: "float32", # VarType.FP32
2: "int32", # VarType.INT32
3: "float64", # VarType.FP64
4: "int16", # VarType.int16
5: "float16", # VarType.FP32
6: "uint16", # VarType.BF16
7: "uint8", # VarType.UINT8
8: "int8", # VarType.INT8
9: "bool", # VarType.BOOL
10: "complex64", # VarType.COMPLEX64
11: "complex128", # VarType.COMPLEX128
12: "string", # load by numpy
13: "bytes", # load by numpy
}
class Op(object): class Op(object):
def __init__(self, def __init__(self,
...@@ -84,6 +104,9 @@ class Op(object): ...@@ -84,6 +104,9 @@ class Op(object):
self._server_use_profile = False self._server_use_profile = False
self._tracer = None self._tracer = None
# for grpc_pipeline predict mode. False, string key/val; True, tensor format.
self._pack_tensor_format = False
# only for thread op # only for thread op
self._for_init_op_lock = threading.Lock() self._for_init_op_lock = threading.Lock()
self._for_close_op_lock = threading.Lock() self._for_close_op_lock = threading.Lock()
...@@ -330,9 +353,8 @@ class Op(object): ...@@ -330,9 +353,8 @@ class Op(object):
if self.client_type == 'brpc': if self.client_type == 'brpc':
client = Client() client = Client()
client.load_client_config(client_config) client.load_client_config(client_config)
# 待测试完成后,使用brpc-http替代。 elif self.client_type == 'pipeline_grpc':
# elif self.client_type == 'grpc': client = PPClient()
# client = MultiLangClient()
elif self.client_type == 'local_predictor': elif self.client_type == 'local_predictor':
if self.local_predictor is None: if self.local_predictor is None:
raise ValueError("local predictor not yet created") raise ValueError("local predictor not yet created")
...@@ -372,6 +394,9 @@ class Op(object): ...@@ -372,6 +394,9 @@ class Op(object):
os._exit(-1) os._exit(-1)
self._input_ops.append(op) self._input_ops.append(op)
def set_pack_tensor_format(self, is_tensor_format=False):
self._pack_tensor_format = is_tensor_format
def get_jump_to_ops(self): def get_jump_to_ops(self):
return self._jump_to_ops return self._jump_to_ops
...@@ -483,7 +508,7 @@ class Op(object): ...@@ -483,7 +508,7 @@ class Op(object):
os._exit(-1) os._exit(-1)
channel.add_producer(self.name) channel.add_producer(self.name)
self._outputs.append(channel) self._outputs.append(channel)
_LOGGER.info("op:{} add output_channel {}".format(self.name, channel)) _LOGGER.debug("op:{} add output_channel {}".format(self.name, channel))
def clean_output_channels(self): def clean_output_channels(self):
self._outputs = [] self._outputs = []
...@@ -531,32 +556,73 @@ class Op(object): ...@@ -531,32 +556,73 @@ class Op(object):
Returns: Returns:
call_result: predict result call_result: predict result
""" """
err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0: call_result = None
_LOGGER.critical( err_code = ChannelDataErrcode.OK.value
self._log("Failed to run process: {}. Please override " err_info = ""
"preprocess func.".format(err_info)))
os._exit(-1)
if self.client_type == "local_predictor": if self.client_type == "local_predictor":
err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0:
_LOGGER.error(
self._log("Failed to run process: {}. feed_batch must be \
npdata in process for local_predictor mode."
.format(err_info)))
return call_result, ChannelDataErrcode.TYPE_ERROR.value, "feed_batch must be npdata"
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_batch[0], feed=feed_batch[0],
fetch=self._fetch_names, fetch=self._fetch_names,
batch=True, batch=True,
log_id=typical_logid) log_id=typical_logid)
else:
elif self.client_type == "brpc":
err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0:
_LOGGER.error(
self._log("Failed to run process: {}. feed_batch must be \
npdata in process for brpc mode.".format(err_info)))
return call_result, ChannelDataErrcode.TYPE_ERROR.value, "feed_batch must be npdata"
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_batch, feed=feed_batch[0],
fetch=self._fetch_names, fetch=self._fetch_names,
batch=True, batch=True,
log_id=typical_logid) log_id=typical_logid)
# 后续用HttpClient替代
''' elif self.client_type == "pipeline_grpc":
if isinstance(self.client, MultiLangClient): err, err_info = ChannelData.check_dictdata(feed_batch)
if call_result is None or call_result["serving_status_code"] != 0: if err != 0:
return None _LOGGER.error(
call_result.pop("serving_status_code") self._log("Failed to run process: {}. feed_batch must be \
''' npdata in process for pipeline_grpc mode."
return call_result .format(err_info)))
return call_result, ChannelDataErrcode.TYPE_ERROR.value, "feed_batch must be dict"
call_result = self.client.predict(
feed_dict=feed_batch[0],
fetch=self._fetch_names,
asyn=False,
pack_tensor_format=self._pack_tensor_format,
profile=False)
if call_result is None:
_LOGGER.error(
self._log("Failed in pipeline_grpc. call_result is None."))
return call_result, ChannelDataErrcode.UNKNOW.value, "pipeline_grpc error"
if call_result.err_no != 0:
_LOGGER.error(
self._log("Failed in pipeline_grpc. err_no:{}, err_info:{}".
format(call_result.err_no, call_result.err_msg)))
return call_result, ChannelDataErrcode(
call_result.err_no).value, call_result.err_msg
new_dict = {}
err_code = ChannelDataErrcode(call_result.err_no).value
err_info = call_result.err_msg
for idx, key in enumerate(call_result.key):
new_dict[key] = [call_result.value[idx]]
call_result = new_dict
return call_result, err_code, err_info
def postprocess(self, input_data, fetch_data, data_id=0, log_id=0): def postprocess(self, input_data, fetch_data, data_id=0, log_id=0):
""" """
...@@ -891,16 +957,20 @@ class Op(object): ...@@ -891,16 +957,20 @@ class Op(object):
midped_batch = None midped_batch = None
error_code = ChannelDataErrcode.OK.value error_code = ChannelDataErrcode.OK.value
error_info = ""
if self._timeout <= 0: if self._timeout <= 0:
# No retry # No retry
try: try:
if batch_input is False: if batch_input is False:
midped_batch = self.process(feed_batch, typical_logid) midped_batch, error_code, error_info = self.process(
feed_batch, typical_logid)
else: else:
midped_batch = [] midped_batch = []
for idx in range(len(feed_batch)): for idx in range(len(feed_batch)):
predict_res = self.process([feed_batch[idx]], predict_res, error_code, error_info = self.process(
typical_logid) [feed_batch[idx]], typical_logid)
if error_code != ChannelDataErrcode.OK.value:
break
midped_batch.append(predict_res) midped_batch.append(predict_res)
except Exception as e: except Exception as e:
error_code = ChannelDataErrcode.UNKNOW.value error_code = ChannelDataErrcode.UNKNOW.value
...@@ -913,14 +983,14 @@ class Op(object): ...@@ -913,14 +983,14 @@ class Op(object):
try: try:
# time out for each process # time out for each process
if batch_input is False: if batch_input is False:
midped_batch = func_timeout.func_timeout( midped_batch, error_code, error_info = func_timeout.func_timeout(
self._timeout, self._timeout,
self.process, self.process,
args=(feed_batch, typical_logid)) args=(feed_batch, typical_logid))
else: else:
midped_batch = [] midped_batch = []
for idx in range(len(feed_batch)): for idx in range(len(feed_batch)):
predict_res = func_timeout.func_timeout( predict_res, error_code, error_info = func_timeout.func_timeout(
self._timeout, self._timeout,
self.process, self.process,
args=([feed_batch[idx]], typical_logid)) args=([feed_batch[idx]], typical_logid))
...@@ -1265,6 +1335,8 @@ class Op(object): ...@@ -1265,6 +1335,8 @@ class Op(object):
break break
end = int(round(_time() * 1000000)) end = int(round(_time() * 1000000))
in_time = end - start in_time = end - start
_LOGGER.debug("op:{} in_time_end:{}".format(op_info_prefix,
time.time()))
# parse channeldata batch # parse channeldata batch
try: try:
...@@ -1278,6 +1350,8 @@ class Op(object): ...@@ -1278,6 +1350,8 @@ class Op(object):
if len(parsed_data_dict) == 0: if len(parsed_data_dict) == 0:
# data in the whole batch is all error data # data in the whole batch is all error data
continue continue
_LOGGER.debug("op:{} parse_end:{}".format(op_info_prefix,
time.time()))
# print # print
front_cost = int(round(_time() * 1000000)) - start front_cost = int(round(_time() * 1000000)) - start
...@@ -1292,6 +1366,8 @@ class Op(object): ...@@ -1292,6 +1366,8 @@ class Op(object):
= self._run_preprocess(parsed_data_dict, op_info_prefix, logid_dict) = self._run_preprocess(parsed_data_dict, op_info_prefix, logid_dict)
end = profiler.record("prep#{}_1".format(op_info_prefix)) end = profiler.record("prep#{}_1".format(op_info_prefix))
prep_time = end - start prep_time = end - start
_LOGGER.debug("op:{} preprocess_end:{}, cost:{}".format(
op_info_prefix, time.time(), prep_time))
try: try:
# put error requests into output channel, skip process and postprocess stage # put error requests into output channel, skip process and postprocess stage
for data_id, err_channeldata in err_channeldata_dict.items(): for data_id, err_channeldata in err_channeldata_dict.items():
...@@ -1313,6 +1389,8 @@ class Op(object): ...@@ -1313,6 +1389,8 @@ class Op(object):
= self._run_process(preped_data_dict, op_info_prefix, skip_process_dict, logid_dict) = self._run_process(preped_data_dict, op_info_prefix, skip_process_dict, logid_dict)
end = profiler.record("midp#{}_1".format(op_info_prefix)) end = profiler.record("midp#{}_1".format(op_info_prefix))
midp_time = end - start midp_time = end - start
_LOGGER.debug("op:{} process_end:{}, cost:{}".format(
op_info_prefix, time.time(), midp_time))
try: try:
for data_id, err_channeldata in err_channeldata_dict.items(): for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels( self._push_to_output_channels(
...@@ -1334,6 +1412,8 @@ class Op(object): ...@@ -1334,6 +1412,8 @@ class Op(object):
end = profiler.record("postp#{}_1".format(op_info_prefix)) end = profiler.record("postp#{}_1".format(op_info_prefix))
postp_time = end - start postp_time = end - start
after_postp_time = _time() after_postp_time = _time()
_LOGGER.debug("op:{} postprocess_end:{}, cost:{}".format(
op_info_prefix, time.time(), postp_time))
try: try:
for data_id, err_channeldata in err_channeldata_dict.items(): for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels( self._push_to_output_channels(
...@@ -1486,6 +1566,90 @@ class RequestOp(Op): ...@@ -1486,6 +1566,90 @@ class RequestOp(Op):
_LOGGER.critical("Op(Request) Failed to init: {}".format(e)) _LOGGER.critical("Op(Request) Failed to init: {}".format(e))
os._exit(-1) os._exit(-1)
def proto_tensor_2_numpy(self, tensor):
"""
Convert proto tensor to numpy array, The supported types are as follows:
INT64
FP32
INT32
FP64
INT16
FP16
BF16
UINT8
INT8
BOOL
BYTES
Unsupported type:
STRING
COMPLEX64
COMPLEX128
Args:
tensor: one tensor in request.tensors.
Returns:
np.ndnumpy
"""
if tensor is None or tensor.elem_type is None or tensor.name is None:
_LOGGER.error("input params of tensor is wrong. tensor: {}".format(
tensor))
return None
dims = []
if tensor.shape is None:
dims.append(1)
else:
for one_dim in tensor.shape:
dims.append(one_dim)
np_data = None
_LOGGER.info("proto_to_numpy, name:{}, type:{}, dims:{}".format(
tensor.name, tensor.elem_type, dims))
if tensor.elem_type == 0:
# VarType: INT64
np_data = np.array(tensor.int64_data).astype(int64).reshape(dims)
elif tensor.elem_type == 1:
# VarType: FP32
np_data = np.array(tensor.float_data).astype(float32).reshape(dims)
elif tensor.elem_type == 2:
# VarType: INT32
np_data = np.array(tensor.int_data).astype(int32).reshape(dims)
elif tensor.elem_type == 3:
# VarType: FP64
np_data = np.array(tensor.float64_data).astype(float64).reshape(
dims)
elif tensor.elem_type == 4:
# VarType: INT16
np_data = np.array(tensor.int_data).astype(int16).reshape(dims)
elif tensor.elem_type == 5:
# VarType: FP16
np_data = np.array(tensor.float_data).astype(float16).reshape(dims)
elif tensor.elem_type == 6:
# VarType: BF16
np_data = np.array(tensor.uint32_data).astype(uint16).reshape(dims)
elif tensor.elem_type == 7:
# VarType: UINT8
np_data = np.array(tensor.uint32_data).astype(uint8).reshape(dims)
elif tensor.elem_type == 8:
# VarType: INT8
np_data = np.array(tensor.int_data).astype(int8).reshape(dims)
elif tensor.elem_type == 9:
# VarType: BOOL
np_data = np.array(tensor.bool_data).astype(bool).reshape(dims)
elif tensor.elem_type == 13:
# VarType: BYTES
byte_data = BytesIO(tensor.byte_data)
np_data = np.load(byte_data, allow_pickle=True)
else:
_LOGGER.error("Sorry, the type {} of tensor {} is not supported.".
format(tensor.elem_type, tensor.name))
raise ValueError(
"Sorry, the type {} of tensor {} is not supported.".format(
tensor.elem_type, tensor.name))
return np_data
def unpack_request_package(self, request): def unpack_request_package(self, request):
""" """
Unpack request package by gateway.proto Unpack request package by gateway.proto
...@@ -1506,12 +1670,47 @@ class RequestOp(Op): ...@@ -1506,12 +1670,47 @@ class RequestOp(Op):
_LOGGER.critical("request is None") _LOGGER.critical("request is None")
raise ValueError("request is None") raise ValueError("request is None")
# unpack key/value string list
for idx, key in enumerate(request.key): for idx, key in enumerate(request.key):
dict_data[key] = request.value[idx] dict_data[key] = request.value[idx]
log_id = request.logid log_id = request.logid
_LOGGER.debug("RequestOp unpack one request. log_id:{}, clientip:{} \
name:{}, method:{}".format(log_id, request.clientip, request.name, # unpack proto.tensors data.
request.method)) for one_tensor in request.tensors:
name = one_tensor.name
elem_type = one_tensor.elem_type
if one_tensor.name is None:
_LOGGER.error("Tensor name is None.")
raise ValueError("Tensor name is None.")
numpy_dtype = _TENSOR_DTYPE_2_NUMPY_DATA_DTYPE.get(elem_type)
if numpy_dtype is None:
_LOGGER.error(
"elem_type:{} is dismatch in unpack_request_package.",
format(elem_type))
raise ValueError("elem_type:{} error".format(elem_type))
if numpy_dtype == "string":
new_string = ""
if one_tensor.str_data is None:
_LOGGER.error(
"str_data of tensor:{} is None, elem_type is {}.".
format(name, elem_type))
raise ValueError(
"str_data of tensor:{} is None, elem_type is {}.".
format(name, elem_type))
for one_str in one_tensor.str_data:
new_string += one_str
dict_data[name] = new_string
else:
dict_data[name] = self.proto_tensor_2_numpy(one_tensor)
_LOGGER.info("RequestOp unpack one request. log_id:{}, clientip:{} \
name:{}, method:{}, time:{}"
.format(log_id, request.clientip, request.name,
request.method, time.time()))
return dict_data, log_id, None, "" return dict_data, log_id, None, ""
...@@ -1530,6 +1729,7 @@ class ResponseOp(Op): ...@@ -1530,6 +1729,7 @@ class ResponseOp(Op):
""" """
super(ResponseOp, self).__init__( super(ResponseOp, self).__init__(
name="@DAGExecutor", input_ops=input_ops) name="@DAGExecutor", input_ops=input_ops)
# init op # init op
try: try:
self.init_op() self.init_op()
...@@ -1538,6 +1738,12 @@ class ResponseOp(Op): ...@@ -1538,6 +1738,12 @@ class ResponseOp(Op):
e, exc_info=True)) e, exc_info=True))
os._exit(-1) os._exit(-1)
# init ResponseOp
self.is_pack_tensor = False
def set_pack_format(self, isTensor=False):
self.is_pack_tensor = isTensor
def pack_response_package(self, channeldata): def pack_response_package(self, channeldata):
""" """
Getting channeldata from the last channel, packting the response Getting channeldata from the last channel, packting the response
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import grpc import grpc
import sys import sys
import time
import numpy as np import numpy as np
from numpy import * from numpy import *
import logging import logging
...@@ -24,6 +25,7 @@ from .channel import ChannelDataErrcode ...@@ -24,6 +25,7 @@ from .channel import ChannelDataErrcode
from .proto import pipeline_service_pb2 from .proto import pipeline_service_pb2
from .proto import pipeline_service_pb2_grpc from .proto import pipeline_service_pb2_grpc
import six import six
from io import BytesIO
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
...@@ -46,7 +48,8 @@ class PipelineClient(object): ...@@ -46,7 +48,8 @@ class PipelineClient(object):
self._stub = pipeline_service_pb2_grpc.PipelineServiceStub( self._stub = pipeline_service_pb2_grpc.PipelineServiceStub(
self._channel) self._channel)
def _pack_request_package(self, feed_dict, profile): def _pack_request_package(self, feed_dict, pack_tensor_format,
use_tensor_bytes, profile):
req = pipeline_service_pb2.Request() req = pipeline_service_pb2.Request()
logid = feed_dict.get("logid") logid = feed_dict.get("logid")
...@@ -69,38 +72,120 @@ class PipelineClient(object): ...@@ -69,38 +72,120 @@ class PipelineClient(object):
feed_dict.pop("clientip") feed_dict.pop("clientip")
np.set_printoptions(threshold=sys.maxsize) np.set_printoptions(threshold=sys.maxsize)
for key, value in feed_dict.items(): if pack_tensor_format is False:
req.key.append(key) # pack string key/val format
for key, value in feed_dict.items():
if (sys.version_info.major == 2 and isinstance(value, req.key.append(key)
(str, unicode)) or
((sys.version_info.major == 3) and isinstance(value, str))): if (sys.version_info.major == 2 and
req.value.append(value) isinstance(value, (str, unicode)) or
continue ((sys.version_info.major == 3) and isinstance(value, str))):
req.value.append(value)
if isinstance(value, np.ndarray): continue
req.value.append(value.__repr__())
elif isinstance(value, list): if isinstance(value, np.ndarray):
req.value.append(np.array(value).__repr__()) req.value.append(value.__repr__())
else: elif isinstance(value, list):
raise TypeError("only str and np.ndarray type is supported: {}". req.value.append(np.array(value).__repr__())
format(type(value))) else:
if profile: raise TypeError(
req.key.append(self._profile_key) "only str and np.ndarray type is supported: {}".format(
req.value.append(self._profile_value) type(value)))
if profile:
req.key.append(self._profile_key)
req.value.append(self._profile_value)
else:
# pack tensor format
for key, value in feed_dict.items():
one_tensor = req.tensors.add()
one_tensor.name = key
if isinstance(value, str):
one_tensor.string_data.add(value)
one_tensor.elem_type = 12 #12 => string in proto
continue
if isinstance(value, np.ndarray):
# copy shape
_LOGGER.debug(
"key:{}, use_tensor_bytes:{}, value.shape:{}, value.dtype:{}".
format(key, use_tensor_bytes, value.shape, value.dtype))
for one_dim in value.shape:
one_tensor.shape.append(one_dim)
# packed into bytes
if use_tensor_bytes is True:
np_bytes = BytesIO()
np.save(np_bytes, value, allow_pickle=True)
one_tensor.byte_data = np_bytes.getvalue()
one_tensor.elem_type = 13 #13 => bytes in proto
continue
flat_value = value.flatten().tolist()
# copy data
if value.dtype == "int64":
one_tensor.int64_data.extend(flat_value)
one_tensor.elem_type = 0
elif value.dtype == "float32":
one_tensor.float_data.extend(flat_value)
one_tensor.elem_type = 1
elif value.dtype == "int32":
one_tensor.int_data.extend(flat_value)
one_tensor.elem_type = 2
elif value.dtype == "float64":
one_tensor.float64_data.extend(flat_value)
one_tensor.elem_type = 3
elif value.dtype == "int16":
one_tensor.int_data.extend(flat_value)
one_tensor.elem_type = 4
elif value.dtype == "float16":
one_tensor.float_data.extend(flat_value)
one_tensor.elem_type = 5
elif value.dtype == "uint16":
one_tensor.uint32_data.extend(flat_value)
one_tensor.elem_type = 6
elif value.dtype == "uint8":
one_tensor.uint32_data.extend(flat_value)
one_tensor.elem_type = 7
elif value.dtype == "int8":
one_tensor.int_data.extend(flat_value)
one_tensor.elem_type = 8
elif value.dtype == "bool":
one_tensor.bool_data.extend(flat_value)
one_tensor.elem_type = 9
else:
_LOGGER.error(
"value type {} of tensor {} is not supported.".
format(value.dtype, key))
else:
raise TypeError(
"only str and np.ndarray type is supported: {}".format(
type(value)))
return req return req
def _unpack_response_package(self, resp, fetch): def _unpack_response_package(self, resp, fetch):
return resp return resp
def predict(self, feed_dict, fetch=None, asyn=False, profile=False): def predict(self,
feed_dict,
fetch=None,
asyn=False,
pack_tensor_format=False,
use_tensor_bytes=False,
profile=False,
log_id=0):
if not isinstance(feed_dict, dict): if not isinstance(feed_dict, dict):
raise TypeError( raise TypeError(
"feed must be dict type with format: {name: value}.") "feed must be dict type with format: {name: value}.")
if fetch is not None and not isinstance(fetch, list): if fetch is not None and not isinstance(fetch, list):
raise TypeError("fetch must be list type with format: [name].") raise TypeError("fetch must be list type with format: [name].")
req = self._pack_request_package(feed_dict, profile) print("PipelineClient::predict pack_data time:{}".format(time.time()))
req = self._pack_request_package(feed_dict, pack_tensor_format,
use_tensor_bytes, profile)
req.logid = log_id
if not asyn: if not asyn:
print("PipelineClient::predict before time:{}".format(time.time()))
resp = self._stub.inference(req) resp = self._stub.inference(req)
return self._unpack_response_package(resp, fetch) return self._unpack_response_package(resp, fetch)
else: else:
......
...@@ -22,6 +22,7 @@ from contextlib import closing ...@@ -22,6 +22,7 @@ from contextlib import closing
import multiprocessing import multiprocessing
import yaml import yaml
import io import io
import time
from .proto import pipeline_service_pb2_grpc, pipeline_service_pb2 from .proto import pipeline_service_pb2_grpc, pipeline_service_pb2
from . import operator from . import operator
...@@ -47,8 +48,9 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer): ...@@ -47,8 +48,9 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
_LOGGER.info("[PipelineServicer] succ init") _LOGGER.info("[PipelineServicer] succ init")
def inference(self, request, context): def inference(self, request, context):
_LOGGER.info("(log_id={}) inference request name:{} self.name:{}". _LOGGER.info(
format(request.logid, request.name, self._name)) "(log_id={}) inference request name:{} self.name:{} time:{}".format(
request.logid, request.name, self._name, time.time()))
if request.name != "" and request.name != self._name: if request.name != "" and request.name != self._name:
_LOGGER.error("(log_id={}) name dismatch error. request.name:{}," _LOGGER.error("(log_id={}) name dismatch error. request.name:{},"
"server.name={}".format(request.logid, request.name, "server.name={}".format(request.logid, request.name,
...@@ -339,7 +341,7 @@ class ServerYamlConfChecker(object): ...@@ -339,7 +341,7 @@ class ServerYamlConfChecker(object):
" or yml_dict can be selected as the parameter.") " or yml_dict can be selected as the parameter.")
if yml_file is not None: if yml_file is not None:
with io.open(yml_file, encoding='utf-8') as f: with io.open(yml_file, encoding='utf-8') as f:
conf = yaml.load(f.read()) conf = yaml.load(f.read(), yaml.FullLoader)
elif yml_dict is not None: elif yml_dict is not None:
conf = yml_dict conf = yml_dict
else: else:
...@@ -469,6 +471,7 @@ class ServerYamlConfChecker(object): ...@@ -469,6 +471,7 @@ class ServerYamlConfChecker(object):
"channel_size": 0, "channel_size": 0,
"is_thread_op": True, "is_thread_op": True,
"tracer": {}, "tracer": {},
"channel_recv_frist_arrive": False,
} }
conf_type = { conf_type = {
...@@ -477,6 +480,7 @@ class ServerYamlConfChecker(object): ...@@ -477,6 +480,7 @@ class ServerYamlConfChecker(object):
"use_profile": bool, "use_profile": bool,
"channel_size": int, "channel_size": int,
"is_thread_op": bool, "is_thread_op": bool,
"channel_recv_frist_arrive": bool,
} }
conf_qualification = { conf_qualification = {
......
...@@ -12,25 +12,120 @@ ...@@ -12,25 +12,120 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.pipeline_serving; package baidu.paddle_serving.pipeline_serving;
// Tensor structure, consistent with PADDLE variable types.
// Descriptions of input and output data.
message Tensor {
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32, FP16
repeated float float_data = 2;
// VarType: INT32, INT16, INT8
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: BF16, UINT8
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string str_data = 9;
// VarType: BYTES, is suitable for big data. No need to save data types and
// dimensions
// pack method: pack by BytesIO, saved by np.save
// unpack method: load by np.load, unpack by BytesIO.
bytes byte_data = 10;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 12 => STRING
// 13 => BYTES
int32 elem_type = 20;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 21;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 22;
// Correspond to the variable 'name' in the model description prototxt.
string name = 23;
};
// The structure of the service request. The input data can be repeated string
// pairs or tensors.
message Request { message Request {
// The input data are repeated string pairs.
// for examples. key is "words", value is the string of words.
repeated string key = 1; repeated string key = 1;
repeated string value = 2; repeated string value = 2;
optional string name = 3;
optional string method = 4; // The input data are repeated tensors for complex data structures.
optional int64 logid = 5; // Becase tensors can save more data information and reduce the amount of data
optional string clientip = 6; // transferred.
repeated Tensor tensors = 3;
// The name field in the RESTful API
string name = 4;
// The method field in the RESTful API
string method = 5;
// For tracing requests and logs
int64 logid = 6;
// For tracking sources
string clientip = 7;
}; };
// The structure of the service response. The output data can be repeated string
// pairs or tensors.
message Response { message Response {
optional int32 err_no = 1; // Error code
optional string err_msg = 2; int32 err_no = 1;
// Error messages
string err_msg = 2;
// The results of string pairs
repeated string key = 3; repeated string key = 3;
repeated string value = 4; repeated string value = 4;
// The results of tensors
repeated Tensor tensors = 5;
}; };
// Python pipeline service
service PipelineService { service PipelineService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response) {}
}; };
...@@ -39,7 +39,7 @@ class AvailablePortGenerator(object): ...@@ -39,7 +39,7 @@ class AvailablePortGenerator(object):
def port_is_available(port): def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2) sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port)) result = sock.connect_ex(('127.0.0.1', port))
if result != 0: if result != 0:
return True return True
else: else:
......
...@@ -7,7 +7,7 @@ protobuf>=3.12.2 ...@@ -7,7 +7,7 @@ protobuf>=3.12.2
grpcio-tools>=1.28.1 grpcio-tools>=1.28.1
grpcio>=1.28.1 grpcio>=1.28.1
func-timeout>=4.3.5 func-timeout>=4.3.5
pyyaml>=1.3.0 pyyaml>=5.1
flask>=1.1.2 flask>=1.1.2
click==7.1.2 click==7.1.2
itsdangerous==1.1.0 itsdangerous==1.1.0
......
...@@ -6,7 +6,7 @@ google>=2.0.3 ...@@ -6,7 +6,7 @@ google>=2.0.3
opencv-python==4.2.0.32 opencv-python==4.2.0.32
protobuf>=3.12.2 protobuf>=3.12.2
func-timeout>=4.3.5 func-timeout>=4.3.5
pyyaml>=1.3.0 pyyaml>=5.1
flask>=1.1.2 flask>=1.1.2
click==7.1.2 click==7.1.2
itsdangerous==1.1.0 itsdangerous==1.1.0
......
...@@ -33,7 +33,7 @@ util.gen_pipeline_code("paddle_serving_server") ...@@ -33,7 +33,7 @@ util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2',
'flask >= 1.1.1', 'click==7.1.2', 'itsdangerous==1.1.0', 'Jinja2==2.11.3', 'flask >= 1.1.1,<2.0.0', 'click==7.1.2', 'itsdangerous==1.1.0', 'Jinja2==2.11.3',
'MarkupSafe==1.1.1', 'Werkzeug==1.0.1', 'func_timeout', 'pyyaml' 'MarkupSafe==1.1.1', 'Werkzeug==1.0.1', 'func_timeout', 'pyyaml'
] ]
......
...@@ -40,10 +40,10 @@ go env -w GO111MODULE=auto ...@@ -40,10 +40,10 @@ go env -w GO111MODULE=auto
build_whl_list=(build_cpu_server build_gpu_server build_client build_app) build_whl_list=(build_cpu_server build_gpu_server build_client build_app)
rpc_model_list=(grpc_fit_a_line grpc_yolov4 pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc \ rpc_model_list=(grpc_fit_a_line grpc_yolov4 pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc \
lac_rpc cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \ lac_rpc_asyn cnn_rpc_asyn bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt \ criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt \
faster_rcnn_model_rpc low_precision_resnet50_int8 ocr_c++_service) faster_rcnn_model_rpc low_precision_resnet50_int8 ocr_c++_service ocr_c++_service_asyn)
http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http \ http_model_list=(fit_a_line_http lac_http imdb_http_proto imdb_http_json imdb_grpc ResNet50_http bert_http \
pipeline_ocr_cpu_http) pipeline_ocr_cpu_http)
function setproxy() { function setproxy() {
...@@ -492,7 +492,7 @@ function ResNet101_rpc() { ...@@ -492,7 +492,7 @@ function ResNet101_rpc() {
kill_server_process kill_server_process
} }
function cnn_rpc() { function cnn_rpc_asyn() {
dir=${log_dir}rpc_model/cnn_rpc/ dir=${log_dir}rpc_model/cnn_rpc/
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
...@@ -500,8 +500,9 @@ function cnn_rpc() { ...@@ -500,8 +500,9 @@ function cnn_rpc() {
data_dir=${data}imdb/ data_dir=${data}imdb/
link_data ${data_dir} link_data ${data_dir}
sed -i 's/9292/8865/g' test_client.py sed -i 's/9292/8865/g' test_client.py
${py_version} -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 --op_num 4 --thread 10 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
check_gpu_memory 0
head test_data/part-0 | ${py_version} test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1 head test_data/part-0 | ${py_version} test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
check_result client "cnn_CPU_RPC server test completed" check_result client "cnn_CPU_RPC server test completed"
kill_server_process kill_server_process
...@@ -537,7 +538,7 @@ function lstm_rpc() { ...@@ -537,7 +538,7 @@ function lstm_rpc() {
kill_server_process kill_server_process
} }
function lac_rpc() { function lac_rpc_asyn() {
dir=${log_dir}rpc_model/lac_rpc/ dir=${log_dir}rpc_model/lac_rpc/
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
...@@ -545,8 +546,9 @@ function lac_rpc() { ...@@ -545,8 +546,9 @@ function lac_rpc() {
data_dir=${data}lac/ data_dir=${data}lac/
link_data ${data_dir} link_data ${data_dir}
sed -i 's/9292/8868/g' lac_client.py sed -i 's/9292/8868/g' lac_client.py
${py_version} -m paddle_serving_server.serve --model lac_model/ --port 8868 > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model lac_model/ --port 8868 --gpu_ids 0 --op_num 2 > ${dir}server_log.txt 2>&1 &
check_result server 5 check_result server 8
check_gpu_memory 0
echo "我爱北京天安门" | ${py_version} lac_client.py lac_client/serving_client_conf.prototxt lac_dict/ > ${dir}client_log.txt 2>&1 echo "我爱北京天安门" | ${py_version} lac_client.py lac_client/serving_client_conf.prototxt lac_dict/ > ${dir}client_log.txt 2>&1
check_result client "lac_CPU_RPC server test completed" check_result client "lac_CPU_RPC server test completed"
kill_server_process kill_server_process
...@@ -768,10 +770,9 @@ function fit_a_line_http() { ...@@ -768,10 +770,9 @@ function fit_a_line_http() {
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
cd ${build_path}/python/examples/fit_a_line cd ${build_path}/python/examples/fit_a_line
sed -i "s/9393/8871/g" test_server.py ${py_version} -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 > ${dir}server_log.txt 2>&1 &
${py_version} test_server.py > ${dir}server_log.txt 2>&1 &
check_result server 10 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:8871/uci/prediction > ${dir}client_log.txt 2>&1 ${py_version} test_httpclient.py uci_housing_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "fit_a_line_CPU_HTTP server test completed" check_result client "fit_a_line_CPU_HTTP server test completed"
kill_server_process kill_server_process
} }
...@@ -781,61 +782,64 @@ function lac_http() { ...@@ -781,61 +782,64 @@ function lac_http() {
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
cd ${build_path}/python/examples/lac cd ${build_path}/python/examples/lac
${py_version} lac_web_service.py lac_model/ lac_workdir 8872 > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model lac_model/ --port 9292 > ${dir}server_log.txt 2>&1 &
check_result server 10 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://127.0.0.1:8872/lac/prediction > ${dir}client_log.txt 2>&1 echo "我爱北京天安门" | ${py_version} lac_http_client.py lac_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "lac_CPU_HTTP server test completed" check_result client "lac_CPU_HTTP server test completed"
kill_server_process kill_server_process
} }
function cnn_http() { function imdb_http_proto() {
dir=${log_dir}http_model/cnn_http/ dir=${log_dir}http_model/imdb_http_proto/
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
cd ${build_path}/python/examples/imdb cd ${build_path}/python/examples/imdb
${py_version} text_classify_service.py imdb_cnn_model/ workdir/ 8873 imdb.vocab > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 > ${dir}server_log.txt 2>&1 &
check_result server 10 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8873/imdb/prediction > ${dir}client_log.txt 2>&1 head test_data/part-0 | ${py_version} test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
check_result client "cnn_CPU_HTTP server test completed" check_result client "imdb_CPU_HTTP-proto server test completed"
kill_server_process kill_server_process
} }
function bow_http() { function imdb_http_json() {
dir=${log_dir}http_model/bow_http/ dir=${log_dir}http_model/imdb_http_json/
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
cd ${build_path}/python/examples/imdb cd ${build_path}/python/examples/imdb
${py_version} text_classify_service.py imdb_bow_model/ workdir/ 8874 imdb.vocab > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 > ${dir}server_log.txt 2>&1 &
check_result server 10 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8874/imdb/prediction > ${dir}client_log.txt 2>&1 sed -i "s/#client.set_http_proto(True)/client.set_http_proto(False)/g" test_http_client.py
check_result client "bow_CPU_HTTP server test completed" head test_data/part-0 | ${py_version} test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
check_result client "imdb_CPU_HTTP-json server test completed"
kill_server_process kill_server_process
} }
function lstm_http() { function imdb_grpc() {
dir=${log_dir}http_model/lstm_http/ dir=${log_dir}http_model/imdb_grpc/
check_dir ${dir} check_dir ${dir}
unsetproxy unsetproxy
cd ${build_path}/python/examples/imdb cd ${build_path}/python/examples/imdb
${py_version} text_classify_service.py imdb_bow_model/ workdir/ 8875 imdb.vocab > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 --gpu_ids 1 > ${dir}server_log.txt 2>&1 &
check_result server 10 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://127.0.0.1:8875/imdb/prediction > ${dir}client_log.txt 2>&1 check_gpu_memory 1
check_result client "lstm_CPU_HTTP server test completed" sed -i "s/client.set_http_proto(False)/#client.set_http_proto(False)/g" test_http_client.py
sed -i "s/#client.set_use_grpc_client(True)/client.set_use_grpc_client(True)/g" test_http_client.py
head test_data/part-0 | ${py_version} test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab > ${dir}client_log.txt 2>&1
check_result client "imdb_GPU_GRPC server test completed"
kill_server_process kill_server_process
} }
function ResNet50_http() { function ResNet50_http() {
echo "pass" dir=${log_dir}http_model/ResNet50_http/
# dir=${log_dir}http_model/ResNet50_http/ check_dir ${dir}
# check_dir ${dir} unsetproxy
# unsetproxy cd ${build_path}/python/examples/imagenet
# cd ${build_path}/python/examples/imagenet ${py_version} -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
# ${py_version} resnet50_web_service.py ResNet50_vd_model gpu 8876 > ${dir}server_log.txt 2>&1 & check_result server 10
# check_result server 10 check_gpu_memory 0
# check_gpu_memory 0 ${py_version} resnet50_http_client.py ResNet50_vd_client_config/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
# curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://127.0.0.1:8876/image/prediction > ${dir}client_log.txt 2>&1 check_result client "ResNet50_GPU_HTTP server test completed"
# check_result client "ResNet50_GPU_HTTP server test completed" kill_server_process
# kill_server_process
} }
function bert_http() { function bert_http() {
...@@ -845,10 +849,11 @@ function bert_http() { ...@@ -845,10 +849,11 @@ function bert_http() {
cd ${build_path}/python/examples/bert cd ${build_path}/python/examples/bert
cp data-c.txt.1 data-c.txt cp data-c.txt.1 data-c.txt
cp vocab.txt.1 vocab.txt cp vocab.txt.1 vocab.txt
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0,1
${py_version} bert_web_service.py bert_seq128_model/ 8878 > ${dir}server_log.txt 2>&1 & ${py_version} -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 > ${dir}server_log.txt 2>&1 &
check_result server 8 check_result server 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction > ${dir}client_log.txt 2>&1 check_gpu_memory 0
head data-c.txt | ${py_version} bert_httpclient.py --model bert_seq128_client/serving_client_conf.prototxt > ${dir}client_log.txt 2>&1
check_result client "bert_GPU_HTTP server test completed" check_result client "bert_GPU_HTTP server test completed"
kill_server_process kill_server_process
} }
...@@ -904,7 +909,7 @@ function ocr_c++_service() { ...@@ -904,7 +909,7 @@ function ocr_c++_service() {
cp -r ocr_det_client/ ./ocr_det_client_cp cp -r ocr_det_client/ ./ocr_det_client_cp
rm -rf ocr_det_client rm -rf ocr_det_client
mv ocr_det_client_cp ocr_det_client mv ocr_det_client_cp ocr_det_client
sed -i "s/feed_type: 1/feed_type: 3/g" ocr_det_client/serving_client_conf.prototxt sed -i "s/feed_type: 1/feed_type: 20/g" ocr_det_client/serving_client_conf.prototxt
sed -i "s/shape: 3/shape: 1/g" ocr_det_client/serving_client_conf.prototxt sed -i "s/shape: 3/shape: 1/g" ocr_det_client/serving_client_conf.prototxt
sed -i '7,8d' ocr_det_client/serving_client_conf.prototxt sed -i '7,8d' ocr_det_client/serving_client_conf.prototxt
echo -e "${GREEN_COLOR}OCR_C++_Service_GPU_RPC server started${RES}" echo -e "${GREEN_COLOR}OCR_C++_Service_GPU_RPC server started${RES}"
...@@ -920,6 +925,23 @@ function ocr_c++_service() { ...@@ -920,6 +925,23 @@ function ocr_c++_service() {
kill_server_process kill_server_process
} }
function ocr_c++_service_asyn() {
dir=${log_dir}rpc_model/ocr_c++_serving/
cd ${build_path}/python/examples/ocr
check_dir ${dir}
echo -e "${GREEN_COLOR}OCR_C++_Service_GPU_RPC asyn_server started${RES}"
$py_version -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --port 9293 --gpu_id 0 --op_num 4 > ${dir}server_log.txt 2>&1 &
check_result server 8
check_gpu_memory 0
echo -e "${GREEN_COLOR}OCR_C++_Service_GPU_RPC client started${RES}"
echo "------------------first:"
$py_version ocr_cpp_client.py ocr_det_client ocr_rec_client
echo "------------------second:"
$py_version ocr_cpp_client.py ocr_det_client ocr_rec_client > ${dir}client_log.txt 2>&1
check_result client "OCR_C++_Service_GPU_RPC server test completed"
kill_server_process
}
function build_all_whl() { function build_all_whl() {
for whl in ${build_whl_list[@]} for whl in ${build_whl_list[@]}
do do
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册