未验证 提交 7f5d653f 编写于 作者: D Dong Daxiang 提交者: GitHub

Merge pull request #123 from MRXLT/general-model-config

add  c++ code for general server
......@@ -15,12 +15,17 @@
syntax = "proto2";
package baidu.paddle_serving.configure;
message Shape {
repeated int32 shape = 1;
};
message FeedVar {
required string name = 1;
required bool is_lod_tensor = 2;
required int32 feed_type = 3;
repeated int32 shape = 4;
}
message FetchVar {
required string name = 1;
repeated int32 shape = 2;
}
message GeneralModelConfig {
repeated bool is_lod_feed = 1;
repeated int32 feed_type = 2;
repeated Shape feed_shape = 4;
repeated FeedVar feed_var = 1;
repeated FetchVar fetch_var = 2;
};
......@@ -16,27 +16,27 @@ syntax = "proto2";
package baidu.paddle_serving.configure;
message ConnectionConf {
required int32 connect_timeout_ms = 1 [default = 2000];
required int32 rpc_timeout_ms = 2 [default = 20000];
required int32 connect_retry_count = 3 [default = 2];
required int32 max_connection_per_host = 4 [default = 100];
required int32 hedge_request_timeout_ms = 5 [default = -1];
required int32 hedge_fetch_retry_count = 6 [default = 2];
required string connection_type = 7 [default = "pooled"];
required int32 connect_timeout_ms = 1 [ default = 2000 ];
required int32 rpc_timeout_ms = 2 [ default = 20000 ];
required int32 connect_retry_count = 3 [ default = 2 ];
required int32 max_connection_per_host = 4 [ default = 100 ];
required int32 hedge_request_timeout_ms = 5 [ default = -1 ];
required int32 hedge_fetch_retry_count = 6 [ default = 2 ];
required string connection_type = 7 [ default = "pooled" ];
};
message NamingConf {
optional string cluster_filter_strategy = 1 [default="Default"];
optional string load_balance_strategy = 2 [default = "la"];
optional string cluster_filter_strategy = 1 [ default = "Default" ];
optional string load_balance_strategy = 2 [ default = "la" ];
optional string cluster = 3;
};
message RpcParameter {
// 0-NONE, 1-SNAPPY, 2-GZIP, 3-ZLIB, 4-LZ4
required int32 compress_type = 1 [default = 0];
required int32 package_size = 2 [default = 20];
required string protocol = 3 [default = "baidu_std"];
required int32 max_channel_per_request = 4 [default = 3];
required int32 compress_type = 1 [ default = 0 ];
required int32 package_size = 2 [ default = 20 ];
required string protocol = 3 [ default = "baidu_std" ];
required int32 max_channel_per_request = 4 [ default = 3 ];
};
message SplitConf {
......@@ -53,12 +53,17 @@ message VariantConf {
optional string variant_router = 6;
};
message WeightedRandomRenderConf { required string variant_weight_list = 1 [default= "50" ]; };
message WeightedRandomRenderConf {
required string variant_weight_list = 1 [ default = "50" ];
};
message Predictor {
required string name = 1 [default="general_model"];
required string service_name = 2 [default="baidu.paddle_serving.predictor.general_model.GeneralModelService"];
required string endpoint_router = 3 [default="WeightedRandomRender"];
required string name = 1 [ default = "general_model" ];
required string service_name = 2 [
default =
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
];
required string endpoint_router = 3 [ default = "WeightedRandomRender" ];
required WeightedRandomRenderConf weighted_random_render_conf = 4;
repeated VariantConf variants = 5;
};
......
......@@ -53,6 +53,8 @@ message ResourceConf {
required string model_toolkit_path = 1;
required string model_toolkit_file = 2;
optional string cube_config_file = 3;
optional string general_model_path = 4;
optional string general_model_file = 5;
};
// DAG node depency info
......
......@@ -23,7 +23,6 @@ namespace predictor {
using configure::ResourceConf;
using configure::GeneralModelConfig;
using configure::Shape;
using rec::mcube::CubeAPI;
// __thread bool p_thread_initialized = false;
......@@ -47,15 +46,22 @@ std::shared_ptr<PaddleGeneralModelConfig> Resource::get_general_model_config() {
}
void Resource::print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig> & config) {
const std::shared_ptr<PaddleGeneralModelConfig>& config) {
if (config == nullptr) {
LOG(INFO) << "paddle general model config is not set";
return;
}
LOG(INFO) << "Number of Feed Tensor: " << config->_feed_type.size();
LOG(INFO) << "Number of Feed Tensor: " << config->_feed_name.size();
std::ostringstream oss;
LOG(INFO) << "Feed Name Info";
for (auto& feed_name : config->_feed_name) {
oss << feed_name << " ";
}
LOG(INFO) << oss.str();
oss.clear();
oss.str("");
LOG(INFO) << "Feed Type Info";
for (auto & feed_type : config->_feed_type) {
for (auto& feed_type : config->_feed_type) {
oss << feed_type << " ";
}
LOG(INFO) << oss.str();
......@@ -71,7 +77,7 @@ void Resource::print_general_model_config(
oss.clear();
oss.str("");
LOG(INFO) << "Capacity Info";
for (auto & cap : config->_capacity) {
for (auto& cap : config->_capacity) {
oss << cap << " ";
}
LOG(INFO) << oss.str();
......@@ -79,8 +85,8 @@ void Resource::print_general_model_config(
oss.str("");
LOG(INFO) << "Feed Shape Info";
int tensor_idx = 0;
for (auto & shape : config->_feed_shape) {
for (auto & dim : shape) {
for (auto& shape : config->_feed_shape) {
for (auto& dim : shape) {
oss << dim << " ";
}
LOG(INFO) << "Tensor[" << tensor_idx++ << "].shape: " << oss.str();
......@@ -146,38 +152,69 @@ int Resource::initialize(const std::string& path, const std::string& file) {
return 0;
}
int Resource::general_model_initialize(
const std::string& path, const std::string & file) {
// model config
int Resource::general_model_initialize(const std::string& path,
const std::string& file) {
if (!FLAGS_enable_general_model) {
return 0;
}
ResourceConf resource_conf;
if (configure::read_proto_conf(path, file, &resource_conf) != 0) {
LOG(ERROR) << "Failed initialize resource from: " << path << "/" << file;
return -1;
}
int err = 0;
std::string general_model_path = resource_conf.general_model_path();
std::string general_model_file = resource_conf.general_model_file();
if (err != 0) {
LOG(ERROR) << "read general_model_path failed, path[" << path << "], file["
<< file << "]";
return -1;
}
GeneralModelConfig model_config;
if (configure::read_proto_conf(path, file, &model_config) != 0) {
LOG(ERROR) << "Failed initialize resource from: " << path << "/" << file;
if (configure::read_proto_conf(general_model_path.c_str(),
general_model_file.c_str(),
&model_config) != 0) {
LOG(ERROR) << "Failed initialize model config from: " << general_model_path
<< "/" << general_model_file;
return -1;
}
_config.reset(new PaddleGeneralModelConfig());
_config->_feed_type.resize(model_config.feed_type_size());
_config->_is_lod_feed.resize(model_config.is_lod_feed_size());
_config->_capacity.resize(model_config.feed_shape_size());
_config->_feed_shape.resize(model_config.feed_shape_size());
for (int i = 0; i < model_config.is_lod_feed_size(); ++i) {
_config->_feed_type[i] = model_config.feed_type(i);
if (model_config.is_lod_feed(i)) {
int feed_var_num = model_config.feed_var_size();
_config->_feed_name.resize(feed_var_num);
_config->_feed_type.resize(feed_var_num);
_config->_is_lod_feed.resize(feed_var_num);
_config->_capacity.resize(feed_var_num);
_config->_feed_shape.resize(feed_var_num);
for (int i = 0; i < feed_var_num; ++i) {
_config->_feed_name[i] = model_config.feed_var(i).name();
_config->_feed_type[i] = model_config.feed_var(i).feed_type();
if (model_config.feed_var(i).is_lod_tensor()) {
_config->_feed_shape[i] = {-1};
_config->_is_lod_feed[i] = true;
} else {
_config->_capacity[i] = 1;
_config->_is_lod_feed[i] = false;
for (int j = 0; j < model_config.feed_shape(i).shape_size(); ++j) {
int dim = model_config.feed_shape(i).shape(j);
for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
int32_t dim = model_config.feed_var(i).shape(j);
_config->_feed_shape[i].push_back(dim);
_config->_capacity[i] *= dim;
}
}
}
int fetch_var_num = model_config.fetch_var_size();
_config->_fetch_name.resize(fetch_var_num);
_config->_fetch_shape.resize(fetch_var_num);
for (int i = 0; i < fetch_var_num; ++i) {
_config->_fetch_name[i] = model_config.fetch_var(i).name();
for (int j = 0; j < model_config.fetch_var(i).shape_size(); ++j) {
int dim = model_config.fetch_var(i).shape(j);
_config->_fetch_shape[i].push_back(dim);
}
}
return 0;
}
......
......@@ -33,16 +33,19 @@ class PaddleGeneralModelConfig {
~PaddleGeneralModelConfig() {}
public:
std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<std::string> _feed_name;
std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<bool> _is_lod_feed; // true lod tensor
std::vector<int> _capacity; // capacity for each tensor
/*
feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/
std::vector<int> _capacity; // capacity for each tensor
/*
feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/
std::vector<std::vector<int>> _feed_shape;
std::vector<std::string> _fetch_name;
std::vector<std::vector<int>> _fetch_shape;
};
class BaseRdDict;
......@@ -76,8 +79,8 @@ class Resource {
int initialize(const std::string& path, const std::string& file);
int cube_initialize(const std::string& path, const std::string& file);
int general_model_initialize(
const std::string& path, const std::string & file);
int general_model_initialize(const std::string& path,
const std::string& file);
int thread_initialize();
......@@ -90,7 +93,7 @@ class Resource {
std::shared_ptr<PaddleGeneralModelConfig> get_general_model_config();
void print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig> & config);
const std::shared_ptr<PaddleGeneralModelConfig>& config);
std::shared_ptr<RocksDBWrapper> getDB();
......
......@@ -45,8 +45,6 @@ using baidu::paddle_serving::predictor::FLAGS_logger_path;
using baidu::paddle_serving::predictor::FLAGS_logger_file;
using baidu::paddle_serving::predictor::FLAGS_resource_path;
using baidu::paddle_serving::predictor::FLAGS_resource_file;
using baidu::paddle_serving::predictor::FLAGS_general_model_path;
using baidu::paddle_serving::predictor::FLAGS_general_model_file;
using baidu::paddle_serving::predictor::FLAGS_reload_interval_s;
using baidu::paddle_serving::predictor::FLAGS_port;
......@@ -219,11 +217,10 @@ int main(int argc, char** argv) {
#ifndef BCLOUD
if (Resource::instance().general_model_initialize(
FLAGS_general_model_path, FLAGS_general_model_file) != 0) {
if (Resource::instance().general_model_initialize(FLAGS_resource_path,
FLAGS_resource_file) != 0) {
LOG(ERROR) << "Failed to initialize general model conf: "
<< FLAGS_general_model_path << "/"
<< FLAGS_general_model_file;
<< FLAGS_resource_path << "/" << FLAGS_resource_file;
return -1;
}
......
is_lod_feed: true
is_lod_feed: false
is_lod_feed: true
feed_type: 1
feed_type: 0
feed_type: 1
feed_shape {
feed_var {
name: "words"
is_lod_tensor: true
feed_type: 0
shape: -1
}
feed_shape {
feed_var {
name: "label"
is_lod_tensor: false
feed_type: 0
shape: 1
}
fetch_var {
name: "cost"
shape: 1
shape: 2
shape: 3
}
feed_shape {
shape: -1
fetch_var {
name: "acc"
shape: 1
}
fetch_var {
name: "prediction"
shape: 2
}
--enable_model_toolkit
--enable_cube=false
--enable_general_model=true
--general_model_path=./conf
--general_model_file=general_model.prototxt
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt"
cube_config_file: "./conf/cube.conf"
general_model_path: "./conf/"
general_model_file: "general_model.prototxt"
......@@ -40,6 +40,6 @@ services {
workflows: "workflow9"
}
services {
name: "LoadGeneralModelService"
workflows: "workflow10"
}
\ No newline at end of file
name: "GeneralModelService"
workflows: "workflow11"
}
......@@ -95,7 +95,24 @@ workflows {
name: "workflow10"
workflow_type: "Sequence"
nodes {
name: "load_general_model_conf_op"
type: "LoadGeneralModelConfOp"
name: "general_model_op"
type: "GeneralModelOp"
}
}
workflows {
name: "workflow11"
workflow_type: "Sequence"
nodes {
name: "general_reader_op"
type: "GeneralReaderOp"
}
nodes {
name: "general_infer_op"
type: "GeneralInferOp"
dependencies {
name: "general_reader_op"
mode: "RO"
}
}
}
[{
"dict_name": "test_dict",
"shard": 2,
"dup": 1,
"timeout": 200,
"retry": 3,
"backup_request": 100,
"type": "ipport_list",
"load_balancer": "rr",
"nodes": [{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
},{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
}]
}]
is_lod_feed: true
is_lod_feed: false
is_lod_feed: true
feed_type: 1
feed_type: 0
feed_type: 1
feed_shape {
shape: -1
}
feed_shape {
shape: 1
shape: 2
shape: 3
}
feed_shape {
shape: -1
}
--enable_model_toolkit
--enable_cube=false
--enable_general_model=true
--general_model_path=./conf
--general_model_file=general_model.prototxt
engines {
name: "general_model"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/text_classification"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
}
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt"
port: 9292
services {
name: "GeneralModelService"
workflows: "workflow1"
}
\ No newline at end of file
workflows {
name: "workflow1"
workflow_type: "Sequence"
nodes {
name: "general_model_op"
type: "GeneralModelOp"
}
}
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "examples/demo-serving/op/general_infer_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "examples/demo-serving/op/general_reader_op.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
int GeneralInferOp::inference() {
const GeneralReaderOutput *reader_out =
get_depend_argument<GeneralReaderOutput>("general_reader_op");
if (!reader_out) {
LOG(ERROR) << "Failed mutable depended argument, op:"
<< "general_reader_op";
return -1;
}
int reader_status = reader_out->reader_status;
if (reader_status != 0) {
LOG(ERROR) << "Read request wrong.";
return -1;
}
const TensorVector *in = &reader_out->tensor_vector;
TensorVector *out = butil::get_object<TensorVector>();
int batch_size = (*in)[0].shape[0];
// infer
if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1;
}
Response *res = mutable_data<Response>();
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = res->add_insts();
for (int j = 0; j < out->size(); ++j) {
Tensor *tensor = fetch_inst->add_tensor_array();
tensor->set_elem_type(1);
if (out->at(j).lod.size() == 1) {
tensor->add_shape(-1);
} else {
for (int k = 1; k < out->at(j).shape.size(); ++k) {
tensor->add_shape(out->at(j).shape[k]);
}
}
}
}
for (int i = 0; i < out->size(); ++i) {
float *data_ptr = static_cast<float *>(out->at(i).data.data());
int cap = 1;
for (int j = 1; j < out->at(i).shape.size(); ++j) {
cap *= out->at(i).shape[j];
}
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1]; k++) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
}
}
/*
for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear();
}
in->clear();
butil::return_object<TensorVector>(in);
for (size_t i = 0; i < out->size(); ++i) {
(*out)[i].shape.clear();
}
out->clear();
butil::return_object<TensorVector>(out);
}
*/
return 0;
}
DEFINE_OP(GeneralInferOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "examples/demo-serving/general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
static const char* GENERAL_MODEL_NAME = "general_model";
class GeneralInferOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralInferOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -14,10 +14,12 @@
#include "examples/demo-serving/op/general_model_op.h"
#include <algorithm>
#include <sstream>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
namespace baidu {
namespace paddle_serving {
......@@ -29,10 +31,12 @@ using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
static std::once_flag g_proto_init_flag;
int GeneralModelOp::inference() {
// request
const Request *req = dynamic_cast<const Request *>(get_request_message());
TensorVector *in = butil::get_object<TensorVector>();
......@@ -44,6 +48,7 @@ int GeneralModelOp::inference() {
std::vector<int> elem_size;
std::vector<int> capacity;
// infer
if (batch_size > 0) {
int var_num = req->insts(0).tensor_array_size();
VLOG(3) << "var num: " << var_num;
......@@ -69,9 +74,7 @@ int GeneralModelOp::inference() {
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0;
k < req->insts(0).tensor_array(i).shape_size();
++k) {
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(3) << "shape for var[" << i << "]: " << dim;
capacity[i] *= dim;
......@@ -90,10 +93,9 @@ int GeneralModelOp::inference() {
for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor & tensor = req->insts(j).tensor_array(i);
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.data_size();
VLOG(3) << "tensor size for var[" << i << "]: "
<< tensor.data_size();
VLOG(3) << "tensor size for var[" << i << "]: " << tensor.data_size();
int cur_len = in->at(i).lod[0].back();
VLOG(3) << "current len: " << cur_len;
in->at(i).lod[0].push_back(cur_len + data_len);
......@@ -101,18 +103,18 @@ int GeneralModelOp::inference() {
}
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1};
VLOG(3) << "var[" << i << "] is lod_tensor and len="
<< in->at(i).lod[0].back();
VLOG(3) << "var[" << i
<< "] is lod_tensor and len=" << in->at(i).lod[0].back();
} else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(3) << "var[" << i << "] is tensor and capacity="
<< batch_size * capacity[i];
VLOG(3) << "var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t * dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
......@@ -126,7 +128,7 @@ int GeneralModelOp::inference() {
}
}
} else {
float * dst_ptr = static_cast<float *>(in->at(i).data.data());
float *dst_ptr = static_cast<float *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
......@@ -149,19 +151,30 @@ int GeneralModelOp::inference() {
return -1;
}
// print request
std::ostringstream oss;
int64_t *example = reinterpret_cast<int64_t *>((*in)[0].data.data());
for (uint32_t i = 0; i < 10; i++) {
oss << *(example + i) << " ";
}
VLOG(3) << "msg: " << oss.str();
// infer
if (predictor::InferManager::instance().infer(
GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: "
<< GENERAL_MODEL_NAME;
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1;
}
// print response
float *example_1 = reinterpret_cast<float *>((*out)[0].data.data());
VLOG(3) << "result: " << *example_1;
Response *res = mutable_data<Response>();
Response * res = mutable_data<Response>();
for (int i = 0; i < batch_size; ++i) {
FetchInst * fetch_inst = res->add_insts();
FetchInst *fetch_inst = res->add_insts();
for (int j = 0; j < out->size(); ++j) {
Tensor * tensor = fetch_inst->add_tensor_array();
Tensor *tensor = fetch_inst->add_tensor_array();
tensor->set_elem_type(1);
if (out->at(j).lod.size() == 1) {
tensor->add_shape(-1);
......@@ -174,30 +187,29 @@ int GeneralModelOp::inference() {
}
for (int i = 0; i < out->size(); ++i) {
float * data_ptr = static_cast<float *>(out->at(i).data.data());
float *data_ptr = static_cast<float *>(out->at(i).data.data());
int cap = 1;
for (int j = 0; j < out->at(i).shape.size(); ++j) {
for (int j = 1; j < out->at(i).shape.size(); ++j) {
cap *= out->at(i).shape[j];
}
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
for (int k = out->at(i).lod[0][j];
k < out->at(i).lod[0][j + 1];
for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1];
k++) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
(char *)(&(data_ptr[k])), sizeof(float));
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
(char *)(&(data_ptr[k])), sizeof(float));
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
}
}
for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear();
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "examples/demo-serving/op/general_reader_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) {
LOG(ERROR) << "feed var number not match.";
return -1;
}
for (int i = 0; i < var_num; ++i) {
if (model_config->_feed_type[i] !=
req->insts(0).tensor_array(i).elem_type()) {
LOG(ERROR) << "feed type not match.";
return -1;
}
if (model_config->_feed_shape[i].size() ==
req->insts(0).tensor_array(i).shape_size()) {
for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
req->insts(0).tensor_array(i).shape(j);
if (model_config->_feed_shape[i][j] !=
req->insts(0).tensor_array(i).shape(j)) {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
} else {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
return 0;
}
int GeneralReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> capacity;
GeneralReaderOutput *res = mutable_data<GeneralReaderOutput>();
TensorVector *in = &res->tensor_vector;
if (!res) {
LOG(ERROR) << "Failed get op tls reader object output";
}
if (batch_size <= 0) {
res->reader_status = -1;
return 0;
}
int var_num = req->insts(0).tensor_array_size();
VLOG(3) << "var num: " << var_num;
// read config
LOG(INFO) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
LOG(INFO) << "get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
LOG(INFO) << "print general model config done.";
// check
res->reader_status = conf_check(req, model_config);
if (res->reader_status != 0) {
LOG(INFO) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
// package tensor
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
paddle::PaddleTensor lod_tensor;
for (int i = 0; i < var_num; ++i) {
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(3) << "var[" << i << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
} else {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(3) << "var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(3) << "shape for var[" << i << "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(3) << "var[" << i << "] is tensor, capacity: " << capacity[i];
}
if (i == 0) {
lod_tensor.name = "words";
} else {
lod_tensor.name = "label";
}
in->push_back(lod_tensor);
}
for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.data_size();
VLOG(3) << "tensor size for var[" << i << "]: " << tensor.data_size();
int cur_len = in->at(i).lod[0].back();
VLOG(3) << "current len: " << cur_len;
in->at(i).lod[0].push_back(cur_len + data_len);
VLOG(3) << "new len: " << cur_len + data_len;
}
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1};
VLOG(3) << "var[" << i
<< "] is lod_tensor and len=" << in->at(i).lod[0].back();
} else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(3) << "var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
dst_ptr[offset + k] =
*(const int64_t *)req->insts(j).tensor_array(i).data(k).c_str();
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
} else {
float *dst_ptr = static_cast<float *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
dst_ptr[offset + k] =
*(const float *)req->insts(j).tensor_array(i).data(k).c_str();
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
}
}
VLOG(3) << "read data from client success";
// print request
std::ostringstream oss;
int64_t *example = reinterpret_cast<int64_t *>((*in)[0].data.data());
for (int i = 0; i < 10; i++) {
oss << *(example + i) << " ";
}
VLOG(3) << "head element of first feed var : " << oss.str();
//
return 0;
}
DEFINE_OP(GeneralReaderOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include <string>
#include "core/predictor/framework/resource.h"
#include "examples/demo-serving/general_model_service.pb.h"
#include "examples/demo-serving/load_general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
struct GeneralReaderOutput {
std::vector<paddle::PaddleTensor> tensor_vector;
int reader_status = 0;
void Clear() {
size_t tensor_count = tensor_vector.size();
for (size_t ti = 0; ti < tensor_count; ++ti) {
tensor_vector[ti].shape.clear();
}
tensor_vector.clear();
}
std::string ShortDebugString() const { return "Not implemented!"; }
};
class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel<
GeneralReaderOutput> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralReaderOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -24,13 +24,13 @@ def batch_predict(batch_size=4):
client.load_client_config(conf_file)
client.connect(["127.0.0.1:8010"])
start = time.time()
fetch = ["acc", "cost", "prediction"]
feed_batch = []
for line in sys.stdin:
group = line.strip().split()
words = [int(x) for x in group[1:int(group[0])]]
label = [int(group[-1])]
feed = {"words": words, "label": label}
fetch = ["acc", "cost", "prediction"]
feed_batch.append(feed)
if len(feed_batch) == batch_size:
fetch_batch = client.batch_predict(
......@@ -39,6 +39,11 @@ def batch_predict(batch_size=4):
print("{} {}".format(fetch_batch[i]["prediction"][1],
feed_batch[i]["label"][0]))
feed_batch = []
if len(feed_batch) > 0:
fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
for i in range(len(feed_batch)):
print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
"label"][0]))
cost = time.time() - start
print("total cost : {}".format(cost))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册