提交 85c12220 编写于 作者: D Dong Daxiang 提交者: GitHub

Merge pull request #123 from MRXLT/general-model-config

add  c++ code for general server
...@@ -15,12 +15,17 @@ ...@@ -15,12 +15,17 @@
syntax = "proto2"; syntax = "proto2";
package baidu.paddle_serving.configure; package baidu.paddle_serving.configure;
message Shape { message FeedVar {
repeated int32 shape = 1; required string name = 1;
}; required bool is_lod_tensor = 2;
required int32 feed_type = 3;
repeated int32 shape = 4;
}
message FetchVar {
required string name = 1;
repeated int32 shape = 2;
}
message GeneralModelConfig { message GeneralModelConfig {
repeated bool is_lod_feed = 1; repeated FeedVar feed_var = 1;
repeated int32 feed_type = 2; repeated FetchVar fetch_var = 2;
repeated Shape feed_shape = 4;
}; };
...@@ -16,27 +16,27 @@ syntax = "proto2"; ...@@ -16,27 +16,27 @@ syntax = "proto2";
package baidu.paddle_serving.configure; package baidu.paddle_serving.configure;
message ConnectionConf { message ConnectionConf {
required int32 connect_timeout_ms = 1 [default = 2000]; required int32 connect_timeout_ms = 1 [ default = 2000 ];
required int32 rpc_timeout_ms = 2 [default = 20000]; required int32 rpc_timeout_ms = 2 [ default = 20000 ];
required int32 connect_retry_count = 3 [default = 2]; required int32 connect_retry_count = 3 [ default = 2 ];
required int32 max_connection_per_host = 4 [default = 100]; required int32 max_connection_per_host = 4 [ default = 100 ];
required int32 hedge_request_timeout_ms = 5 [default = -1]; required int32 hedge_request_timeout_ms = 5 [ default = -1 ];
required int32 hedge_fetch_retry_count = 6 [default = 2]; required int32 hedge_fetch_retry_count = 6 [ default = 2 ];
required string connection_type = 7 [default = "pooled"]; required string connection_type = 7 [ default = "pooled" ];
}; };
message NamingConf { message NamingConf {
optional string cluster_filter_strategy = 1 [default="Default"]; optional string cluster_filter_strategy = 1 [ default = "Default" ];
optional string load_balance_strategy = 2 [default = "la"]; optional string load_balance_strategy = 2 [ default = "la" ];
optional string cluster = 3; optional string cluster = 3;
}; };
message RpcParameter { message RpcParameter {
// 0-NONE, 1-SNAPPY, 2-GZIP, 3-ZLIB, 4-LZ4 // 0-NONE, 1-SNAPPY, 2-GZIP, 3-ZLIB, 4-LZ4
required int32 compress_type = 1 [default = 0]; required int32 compress_type = 1 [ default = 0 ];
required int32 package_size = 2 [default = 20]; required int32 package_size = 2 [ default = 20 ];
required string protocol = 3 [default = "baidu_std"]; required string protocol = 3 [ default = "baidu_std" ];
required int32 max_channel_per_request = 4 [default = 3]; required int32 max_channel_per_request = 4 [ default = 3 ];
}; };
message SplitConf { message SplitConf {
...@@ -53,12 +53,17 @@ message VariantConf { ...@@ -53,12 +53,17 @@ message VariantConf {
optional string variant_router = 6; optional string variant_router = 6;
}; };
message WeightedRandomRenderConf { required string variant_weight_list = 1 [default= "50" ]; }; message WeightedRandomRenderConf {
required string variant_weight_list = 1 [ default = "50" ];
};
message Predictor { message Predictor {
required string name = 1 [default="general_model"]; required string name = 1 [ default = "general_model" ];
required string service_name = 2 [default="baidu.paddle_serving.predictor.general_model.GeneralModelService"]; required string service_name = 2 [
required string endpoint_router = 3 [default="WeightedRandomRender"]; default =
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
];
required string endpoint_router = 3 [ default = "WeightedRandomRender" ];
required WeightedRandomRenderConf weighted_random_render_conf = 4; required WeightedRandomRenderConf weighted_random_render_conf = 4;
repeated VariantConf variants = 5; repeated VariantConf variants = 5;
}; };
......
...@@ -53,6 +53,8 @@ message ResourceConf { ...@@ -53,6 +53,8 @@ message ResourceConf {
required string model_toolkit_path = 1; required string model_toolkit_path = 1;
required string model_toolkit_file = 2; required string model_toolkit_file = 2;
optional string cube_config_file = 3; optional string cube_config_file = 3;
optional string general_model_path = 4;
optional string general_model_file = 5;
}; };
// DAG node depency info // DAG node depency info
......
...@@ -23,7 +23,6 @@ namespace predictor { ...@@ -23,7 +23,6 @@ namespace predictor {
using configure::ResourceConf; using configure::ResourceConf;
using configure::GeneralModelConfig; using configure::GeneralModelConfig;
using configure::Shape;
using rec::mcube::CubeAPI; using rec::mcube::CubeAPI;
// __thread bool p_thread_initialized = false; // __thread bool p_thread_initialized = false;
...@@ -47,15 +46,22 @@ std::shared_ptr<PaddleGeneralModelConfig> Resource::get_general_model_config() { ...@@ -47,15 +46,22 @@ std::shared_ptr<PaddleGeneralModelConfig> Resource::get_general_model_config() {
} }
void Resource::print_general_model_config( void Resource::print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig> & config) { const std::shared_ptr<PaddleGeneralModelConfig>& config) {
if (config == nullptr) { if (config == nullptr) {
LOG(INFO) << "paddle general model config is not set"; LOG(INFO) << "paddle general model config is not set";
return; return;
} }
LOG(INFO) << "Number of Feed Tensor: " << config->_feed_type.size(); LOG(INFO) << "Number of Feed Tensor: " << config->_feed_name.size();
std::ostringstream oss; std::ostringstream oss;
LOG(INFO) << "Feed Name Info";
for (auto& feed_name : config->_feed_name) {
oss << feed_name << " ";
}
LOG(INFO) << oss.str();
oss.clear();
oss.str("");
LOG(INFO) << "Feed Type Info"; LOG(INFO) << "Feed Type Info";
for (auto & feed_type : config->_feed_type) { for (auto& feed_type : config->_feed_type) {
oss << feed_type << " "; oss << feed_type << " ";
} }
LOG(INFO) << oss.str(); LOG(INFO) << oss.str();
...@@ -71,7 +77,7 @@ void Resource::print_general_model_config( ...@@ -71,7 +77,7 @@ void Resource::print_general_model_config(
oss.clear(); oss.clear();
oss.str(""); oss.str("");
LOG(INFO) << "Capacity Info"; LOG(INFO) << "Capacity Info";
for (auto & cap : config->_capacity) { for (auto& cap : config->_capacity) {
oss << cap << " "; oss << cap << " ";
} }
LOG(INFO) << oss.str(); LOG(INFO) << oss.str();
...@@ -79,8 +85,8 @@ void Resource::print_general_model_config( ...@@ -79,8 +85,8 @@ void Resource::print_general_model_config(
oss.str(""); oss.str("");
LOG(INFO) << "Feed Shape Info"; LOG(INFO) << "Feed Shape Info";
int tensor_idx = 0; int tensor_idx = 0;
for (auto & shape : config->_feed_shape) { for (auto& shape : config->_feed_shape) {
for (auto & dim : shape) { for (auto& dim : shape) {
oss << dim << " "; oss << dim << " ";
} }
LOG(INFO) << "Tensor[" << tensor_idx++ << "].shape: " << oss.str(); LOG(INFO) << "Tensor[" << tensor_idx++ << "].shape: " << oss.str();
...@@ -146,38 +152,69 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -146,38 +152,69 @@ int Resource::initialize(const std::string& path, const std::string& file) {
return 0; return 0;
} }
int Resource::general_model_initialize( // model config
const std::string& path, const std::string & file) { int Resource::general_model_initialize(const std::string& path,
const std::string& file) {
if (!FLAGS_enable_general_model) { if (!FLAGS_enable_general_model) {
return 0; return 0;
} }
ResourceConf resource_conf;
if (configure::read_proto_conf(path, file, &resource_conf) != 0) {
LOG(ERROR) << "Failed initialize resource from: " << path << "/" << file;
return -1;
}
int err = 0;
std::string general_model_path = resource_conf.general_model_path();
std::string general_model_file = resource_conf.general_model_file();
if (err != 0) {
LOG(ERROR) << "read general_model_path failed, path[" << path << "], file["
<< file << "]";
return -1;
}
GeneralModelConfig model_config; GeneralModelConfig model_config;
if (configure::read_proto_conf(path, file, &model_config) != 0) { if (configure::read_proto_conf(general_model_path.c_str(),
LOG(ERROR) << "Failed initialize resource from: " << path << "/" << file; general_model_file.c_str(),
&model_config) != 0) {
LOG(ERROR) << "Failed initialize model config from: " << general_model_path
<< "/" << general_model_file;
return -1; return -1;
} }
_config.reset(new PaddleGeneralModelConfig()); _config.reset(new PaddleGeneralModelConfig());
_config->_feed_type.resize(model_config.feed_type_size()); int feed_var_num = model_config.feed_var_size();
_config->_is_lod_feed.resize(model_config.is_lod_feed_size()); _config->_feed_name.resize(feed_var_num);
_config->_capacity.resize(model_config.feed_shape_size()); _config->_feed_type.resize(feed_var_num);
_config->_feed_shape.resize(model_config.feed_shape_size()); _config->_is_lod_feed.resize(feed_var_num);
for (int i = 0; i < model_config.is_lod_feed_size(); ++i) { _config->_capacity.resize(feed_var_num);
_config->_feed_type[i] = model_config.feed_type(i); _config->_feed_shape.resize(feed_var_num);
if (model_config.is_lod_feed(i)) { for (int i = 0; i < feed_var_num; ++i) {
_config->_feed_name[i] = model_config.feed_var(i).name();
_config->_feed_type[i] = model_config.feed_var(i).feed_type();
if (model_config.feed_var(i).is_lod_tensor()) {
_config->_feed_shape[i] = {-1}; _config->_feed_shape[i] = {-1};
_config->_is_lod_feed[i] = true; _config->_is_lod_feed[i] = true;
} else { } else {
_config->_capacity[i] = 1; _config->_capacity[i] = 1;
_config->_is_lod_feed[i] = false; _config->_is_lod_feed[i] = false;
for (int j = 0; j < model_config.feed_shape(i).shape_size(); ++j) { for (int j = 0; j < model_config.feed_var(i).shape_size(); ++j) {
int dim = model_config.feed_shape(i).shape(j); int32_t dim = model_config.feed_var(i).shape(j);
_config->_feed_shape[i].push_back(dim); _config->_feed_shape[i].push_back(dim);
_config->_capacity[i] *= dim; _config->_capacity[i] *= dim;
} }
} }
} }
int fetch_var_num = model_config.fetch_var_size();
_config->_fetch_name.resize(fetch_var_num);
_config->_fetch_shape.resize(fetch_var_num);
for (int i = 0; i < fetch_var_num; ++i) {
_config->_fetch_name[i] = model_config.fetch_var(i).name();
for (int j = 0; j < model_config.fetch_var(i).shape_size(); ++j) {
int dim = model_config.fetch_var(i).shape(j);
_config->_fetch_shape[i].push_back(dim);
}
}
return 0; return 0;
} }
......
...@@ -33,16 +33,19 @@ class PaddleGeneralModelConfig { ...@@ -33,16 +33,19 @@ class PaddleGeneralModelConfig {
~PaddleGeneralModelConfig() {} ~PaddleGeneralModelConfig() {}
public: public:
std::vector<std::string> _feed_name;
std::vector<int> _feed_type; // 0 int64, 1 float std::vector<int> _feed_type; // 0 int64, 1 float
std::vector<bool> _is_lod_feed; // true lod tensor std::vector<bool> _is_lod_feed; // true lod tensor
std::vector<int> _capacity; // capacity for each tensor std::vector<int> _capacity; // capacity for each tensor
/* /*
feed_shape_ for feeded variable feed_shape_ for feeded variable
feed_shape_[i][j] represents the jth dim for ith input Tensor feed_shape_[i][j] represents the jth dim for ith input Tensor
if is_lod_feed_[i] == False, feed_shape_[i][0] = -1 if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
*/ */
std::vector<std::vector<int>> _feed_shape; std::vector<std::vector<int>> _feed_shape;
std::vector<std::string> _fetch_name;
std::vector<std::vector<int>> _fetch_shape;
}; };
class BaseRdDict; class BaseRdDict;
...@@ -76,8 +79,8 @@ class Resource { ...@@ -76,8 +79,8 @@ class Resource {
int initialize(const std::string& path, const std::string& file); int initialize(const std::string& path, const std::string& file);
int cube_initialize(const std::string& path, const std::string& file); int cube_initialize(const std::string& path, const std::string& file);
int general_model_initialize( int general_model_initialize(const std::string& path,
const std::string& path, const std::string & file); const std::string& file);
int thread_initialize(); int thread_initialize();
...@@ -90,7 +93,7 @@ class Resource { ...@@ -90,7 +93,7 @@ class Resource {
std::shared_ptr<PaddleGeneralModelConfig> get_general_model_config(); std::shared_ptr<PaddleGeneralModelConfig> get_general_model_config();
void print_general_model_config( void print_general_model_config(
const std::shared_ptr<PaddleGeneralModelConfig> & config); const std::shared_ptr<PaddleGeneralModelConfig>& config);
std::shared_ptr<RocksDBWrapper> getDB(); std::shared_ptr<RocksDBWrapper> getDB();
......
...@@ -45,8 +45,6 @@ using baidu::paddle_serving::predictor::FLAGS_logger_path; ...@@ -45,8 +45,6 @@ using baidu::paddle_serving::predictor::FLAGS_logger_path;
using baidu::paddle_serving::predictor::FLAGS_logger_file; using baidu::paddle_serving::predictor::FLAGS_logger_file;
using baidu::paddle_serving::predictor::FLAGS_resource_path; using baidu::paddle_serving::predictor::FLAGS_resource_path;
using baidu::paddle_serving::predictor::FLAGS_resource_file; using baidu::paddle_serving::predictor::FLAGS_resource_file;
using baidu::paddle_serving::predictor::FLAGS_general_model_path;
using baidu::paddle_serving::predictor::FLAGS_general_model_file;
using baidu::paddle_serving::predictor::FLAGS_reload_interval_s; using baidu::paddle_serving::predictor::FLAGS_reload_interval_s;
using baidu::paddle_serving::predictor::FLAGS_port; using baidu::paddle_serving::predictor::FLAGS_port;
...@@ -219,11 +217,10 @@ int main(int argc, char** argv) { ...@@ -219,11 +217,10 @@ int main(int argc, char** argv) {
#ifndef BCLOUD #ifndef BCLOUD
if (Resource::instance().general_model_initialize( if (Resource::instance().general_model_initialize(FLAGS_resource_path,
FLAGS_general_model_path, FLAGS_general_model_file) != 0) { FLAGS_resource_file) != 0) {
LOG(ERROR) << "Failed to initialize general model conf: " LOG(ERROR) << "Failed to initialize general model conf: "
<< FLAGS_general_model_path << "/" << FLAGS_resource_path << "/" << FLAGS_resource_file;
<< FLAGS_general_model_file;
return -1; return -1;
} }
......
is_lod_feed: true feed_var {
is_lod_feed: false name: "words"
is_lod_feed: true is_lod_tensor: true
feed_type: 1 feed_type: 0
feed_type: 0
feed_type: 1
feed_shape {
shape: -1 shape: -1
} }
feed_var {
feed_shape { name: "label"
is_lod_tensor: false
feed_type: 0
shape: 1
}
fetch_var {
name: "cost"
shape: 1 shape: 1
shape: 2
shape: 3
} }
fetch_var {
feed_shape { name: "acc"
shape: -1 shape: 1
}
fetch_var {
name: "prediction"
shape: 2
} }
--enable_model_toolkit --enable_model_toolkit
--enable_cube=false --enable_cube=false
--enable_general_model=true --enable_general_model=true
--general_model_path=./conf
--general_model_file=general_model.prototxt
model_toolkit_path: "./conf/" model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt" model_toolkit_file: "model_toolkit.prototxt"
cube_config_file: "./conf/cube.conf" cube_config_file: "./conf/cube.conf"
general_model_path: "./conf/"
general_model_file: "general_model.prototxt"
...@@ -40,6 +40,6 @@ services { ...@@ -40,6 +40,6 @@ services {
workflows: "workflow9" workflows: "workflow9"
} }
services { services {
name: "LoadGeneralModelService" name: "GeneralModelService"
workflows: "workflow10" workflows: "workflow11"
} }
\ No newline at end of file
...@@ -95,7 +95,24 @@ workflows { ...@@ -95,7 +95,24 @@ workflows {
name: "workflow10" name: "workflow10"
workflow_type: "Sequence" workflow_type: "Sequence"
nodes { nodes {
name: "load_general_model_conf_op" name: "general_model_op"
type: "LoadGeneralModelConfOp" type: "GeneralModelOp"
}
}
workflows {
name: "workflow11"
workflow_type: "Sequence"
nodes {
name: "general_reader_op"
type: "GeneralReaderOp"
}
nodes {
name: "general_infer_op"
type: "GeneralInferOp"
dependencies {
name: "general_reader_op"
mode: "RO"
}
} }
} }
[{
"dict_name": "test_dict",
"shard": 2,
"dup": 1,
"timeout": 200,
"retry": 3,
"backup_request": 100,
"type": "ipport_list",
"load_balancer": "rr",
"nodes": [{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
},{
"ipport_list": "list://xxx.xxx.xxx.xxx:8000"
}]
}]
is_lod_feed: true
is_lod_feed: false
is_lod_feed: true
feed_type: 1
feed_type: 0
feed_type: 1
feed_shape {
shape: -1
}
feed_shape {
shape: 1
shape: 2
shape: 3
}
feed_shape {
shape: -1
}
--enable_model_toolkit
--enable_cube=false
--enable_general_model=true
--general_model_path=./conf
--general_model_file=general_model.prototxt
engines {
name: "general_model"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/text_classification"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
}
model_toolkit_path: "./conf/"
model_toolkit_file: "model_toolkit.prototxt"
port: 9292
services {
name: "GeneralModelService"
workflows: "workflow1"
}
\ No newline at end of file
workflows {
name: "workflow1"
workflow_type: "Sequence"
nodes {
name: "general_model_op"
type: "GeneralModelOp"
}
}
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "examples/demo-serving/op/general_infer_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
#include "examples/demo-serving/op/general_reader_op.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::InferManager;
int GeneralInferOp::inference() {
const GeneralReaderOutput *reader_out =
get_depend_argument<GeneralReaderOutput>("general_reader_op");
if (!reader_out) {
LOG(ERROR) << "Failed mutable depended argument, op:"
<< "general_reader_op";
return -1;
}
int reader_status = reader_out->reader_status;
if (reader_status != 0) {
LOG(ERROR) << "Read request wrong.";
return -1;
}
const TensorVector *in = &reader_out->tensor_vector;
TensorVector *out = butil::get_object<TensorVector>();
int batch_size = (*in)[0].shape[0];
// infer
if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1;
}
Response *res = mutable_data<Response>();
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = res->add_insts();
for (int j = 0; j < out->size(); ++j) {
Tensor *tensor = fetch_inst->add_tensor_array();
tensor->set_elem_type(1);
if (out->at(j).lod.size() == 1) {
tensor->add_shape(-1);
} else {
for (int k = 1; k < out->at(j).shape.size(); ++k) {
tensor->add_shape(out->at(j).shape[k]);
}
}
}
}
for (int i = 0; i < out->size(); ++i) {
float *data_ptr = static_cast<float *>(out->at(i).data.data());
int cap = 1;
for (int j = 1; j < out->at(i).shape.size(); ++j) {
cap *= out->at(i).shape[j];
}
if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1]; k++) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
}
}
}
}
/*
for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear();
}
in->clear();
butil::return_object<TensorVector>(in);
for (size_t i = 0; i < out->size(); ++i) {
(*out)[i].shape.clear();
}
out->clear();
butil::return_object<TensorVector>(out);
}
*/
return 0;
}
DEFINE_OP(GeneralInferOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "examples/demo-serving/general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
static const char* GENERAL_MODEL_NAME = "general_model";
class GeneralInferOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralInferOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
#include "examples/demo-serving/op/general_model_op.h" #include "examples/demo-serving/op/general_model_op.h"
#include <algorithm> #include <algorithm>
#include <sstream>
#include <iostream> #include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "core/predictor/framework/resource.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
...@@ -29,10 +31,12 @@ using baidu::paddle_serving::predictor::general_model::Request; ...@@ -29,10 +31,12 @@ using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst; using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::FetchInst; using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
static std::once_flag g_proto_init_flag; static std::once_flag g_proto_init_flag;
int GeneralModelOp::inference() { int GeneralModelOp::inference() {
// request
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
TensorVector *in = butil::get_object<TensorVector>(); TensorVector *in = butil::get_object<TensorVector>();
...@@ -44,6 +48,7 @@ int GeneralModelOp::inference() { ...@@ -44,6 +48,7 @@ int GeneralModelOp::inference() {
std::vector<int> elem_size; std::vector<int> elem_size;
std::vector<int> capacity; std::vector<int> capacity;
// infer
if (batch_size > 0) { if (batch_size > 0) {
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(3) << "var num: " << var_num; VLOG(3) << "var num: " << var_num;
...@@ -69,9 +74,7 @@ int GeneralModelOp::inference() { ...@@ -69,9 +74,7 @@ int GeneralModelOp::inference() {
} else { } else {
lod_tensor.shape.push_back(batch_size); lod_tensor.shape.push_back(batch_size);
capacity[i] = 1; capacity[i] = 1;
for (int k = 0; for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
k < req->insts(0).tensor_array(i).shape_size();
++k) {
int dim = req->insts(0).tensor_array(i).shape(k); int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(3) << "shape for var[" << i << "]: " << dim; VLOG(3) << "shape for var[" << i << "]: " << dim;
capacity[i] *= dim; capacity[i] *= dim;
...@@ -90,10 +93,9 @@ int GeneralModelOp::inference() { ...@@ -90,10 +93,9 @@ int GeneralModelOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) { if (in->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
const Tensor & tensor = req->insts(j).tensor_array(i); const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.data_size(); int data_len = tensor.data_size();
VLOG(3) << "tensor size for var[" << i << "]: " VLOG(3) << "tensor size for var[" << i << "]: " << tensor.data_size();
<< tensor.data_size();
int cur_len = in->at(i).lod[0].back(); int cur_len = in->at(i).lod[0].back();
VLOG(3) << "current len: " << cur_len; VLOG(3) << "current len: " << cur_len;
in->at(i).lod[0].push_back(cur_len + data_len); in->at(i).lod[0].push_back(cur_len + data_len);
...@@ -101,18 +103,18 @@ int GeneralModelOp::inference() { ...@@ -101,18 +103,18 @@ int GeneralModelOp::inference() {
} }
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]); in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1}; in->at(i).shape = {in->at(i).lod[0].back(), 1};
VLOG(3) << "var[" << i << "] is lod_tensor and len=" VLOG(3) << "var[" << i
<< in->at(i).lod[0].back(); << "] is lod_tensor and len=" << in->at(i).lod[0].back();
} else { } else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(3) << "var[" << i << "] is tensor and capacity=" VLOG(3) << "var[" << i
<< batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
} }
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) { if (elem_type[i] == 0) {
int64_t * dst_ptr = static_cast<int64_t *>(in->at(i).data.data()); int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
...@@ -126,7 +128,7 @@ int GeneralModelOp::inference() { ...@@ -126,7 +128,7 @@ int GeneralModelOp::inference() {
} }
} }
} else { } else {
float * dst_ptr = static_cast<float *>(in->at(i).data.data()); float *dst_ptr = static_cast<float *>(in->at(i).data.data());
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) { for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
...@@ -149,19 +151,30 @@ int GeneralModelOp::inference() { ...@@ -149,19 +151,30 @@ int GeneralModelOp::inference() {
return -1; return -1;
} }
// print request
std::ostringstream oss;
int64_t *example = reinterpret_cast<int64_t *>((*in)[0].data.data());
for (uint32_t i = 0; i < 10; i++) {
oss << *(example + i) << " ";
}
VLOG(3) << "msg: " << oss.str();
// infer
if (predictor::InferManager::instance().infer( if (predictor::InferManager::instance().infer(
GENERAL_MODEL_NAME, in, out, batch_size)) { GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
<< GENERAL_MODEL_NAME;
return -1; return -1;
} }
// print response
float *example_1 = reinterpret_cast<float *>((*out)[0].data.data());
VLOG(3) << "result: " << *example_1;
Response *res = mutable_data<Response>();
Response * res = mutable_data<Response>();
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
FetchInst * fetch_inst = res->add_insts(); FetchInst *fetch_inst = res->add_insts();
for (int j = 0; j < out->size(); ++j) { for (int j = 0; j < out->size(); ++j) {
Tensor * tensor = fetch_inst->add_tensor_array(); Tensor *tensor = fetch_inst->add_tensor_array();
tensor->set_elem_type(1); tensor->set_elem_type(1);
if (out->at(j).lod.size() == 1) { if (out->at(j).lod.size() == 1) {
tensor->add_shape(-1); tensor->add_shape(-1);
...@@ -174,30 +187,29 @@ int GeneralModelOp::inference() { ...@@ -174,30 +187,29 @@ int GeneralModelOp::inference() {
} }
for (int i = 0; i < out->size(); ++i) { for (int i = 0; i < out->size(); ++i) {
float * data_ptr = static_cast<float *>(out->at(i).data.data()); float *data_ptr = static_cast<float *>(out->at(i).data.data());
int cap = 1; int cap = 1;
for (int j = 0; j < out->at(i).shape.size(); ++j) { for (int j = 1; j < out->at(i).shape.size(); ++j) {
cap *= out->at(i).shape[j]; cap *= out->at(i).shape[j];
} }
if (out->at(i).lod.size() == 1) { if (out->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = out->at(i).lod[0][j]; for (int k = out->at(i).lod[0][j]; k < out->at(i).lod[0][j + 1];
k < out->at(i).lod[0][j + 1];
k++) { k++) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data( res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
(char *)(&(data_ptr[k])), sizeof(float)); reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
} }
} }
} else { } else {
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) { for (int k = j * cap; k < (j + 1) * cap; ++k) {
res->mutable_insts(j)->mutable_tensor_array(i)->add_data( res->mutable_insts(j)->mutable_tensor_array(i)->add_data(
(char *)(&(data_ptr[k])), sizeof(float)); reinterpret_cast<char *>(&(data_ptr[k])), sizeof(float));
} }
} }
} }
} }
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear(); (*in)[i].shape.clear();
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "examples/demo-serving/op/general_reader_op.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <sstream>
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) {
LOG(ERROR) << "feed var number not match.";
return -1;
}
for (int i = 0; i < var_num; ++i) {
if (model_config->_feed_type[i] !=
req->insts(0).tensor_array(i).elem_type()) {
LOG(ERROR) << "feed type not match.";
return -1;
}
if (model_config->_feed_shape[i].size() ==
req->insts(0).tensor_array(i).shape_size()) {
for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
req->insts(0).tensor_array(i).shape(j);
if (model_config->_feed_shape[i][j] !=
req->insts(0).tensor_array(i).shape(j)) {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
} else {
LOG(ERROR) << "feed shape not match.";
return -1;
}
}
return 0;
}
int GeneralReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> capacity;
GeneralReaderOutput *res = mutable_data<GeneralReaderOutput>();
TensorVector *in = &res->tensor_vector;
if (!res) {
LOG(ERROR) << "Failed get op tls reader object output";
}
if (batch_size <= 0) {
res->reader_status = -1;
return 0;
}
int var_num = req->insts(0).tensor_array_size();
VLOG(3) << "var num: " << var_num;
// read config
LOG(INFO) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
LOG(INFO) << "get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
LOG(INFO) << "print general model config done.";
// check
res->reader_status = conf_check(req, model_config);
if (res->reader_status != 0) {
LOG(INFO) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
// package tensor
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
paddle::PaddleTensor lod_tensor;
for (int i = 0; i < var_num; ++i) {
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(3) << "var[" << i << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
} else {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(3) << "var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(3) << "shape for var[" << i << "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(3) << "var[" << i << "] is tensor, capacity: " << capacity[i];
}
if (i == 0) {
lod_tensor.name = "words";
} else {
lod_tensor.name = "label";
}
in->push_back(lod_tensor);
}
for (int i = 0; i < var_num; ++i) {
if (in->at(i).lod.size() == 1) {
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = tensor.data_size();
VLOG(3) << "tensor size for var[" << i << "]: " << tensor.data_size();
int cur_len = in->at(i).lod[0].back();
VLOG(3) << "current len: " << cur_len;
in->at(i).lod[0].push_back(cur_len + data_len);
VLOG(3) << "new len: " << cur_len + data_len;
}
in->at(i).data.Resize(in->at(i).lod[0].back() * elem_size[i]);
in->at(i).shape = {in->at(i).lod[0].back(), 1};
VLOG(3) << "var[" << i
<< "] is lod_tensor and len=" << in->at(i).lod[0].back();
} else {
in->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(3) << "var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
dst_ptr[offset + k] =
*(const int64_t *)req->insts(j).tensor_array(i).data(k).c_str();
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
} else {
float *dst_ptr = static_cast<float *>(in->at(i).data.data());
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
for (int k = 0; k < req->insts(j).tensor_array(i).data_size(); ++k) {
dst_ptr[offset + k] =
*(const float *)req->insts(j).tensor_array(i).data(k).c_str();
}
if (in->at(i).lod.size() == 1) {
offset = in->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
}
}
}
VLOG(3) << "read data from client success";
// print request
std::ostringstream oss;
int64_t *example = reinterpret_cast<int64_t *>((*in)[0].data.data());
for (int i = 0; i < 10; i++) {
oss << *(example + i) << " ";
}
VLOG(3) << "head element of first feed var : " << oss.str();
//
return 0;
}
DEFINE_OP(GeneralReaderOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include <string>
#include "core/predictor/framework/resource.h"
#include "examples/demo-serving/general_model_service.pb.h"
#include "examples/demo-serving/load_general_model_service.pb.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
struct GeneralReaderOutput {
std::vector<paddle::PaddleTensor> tensor_vector;
int reader_status = 0;
void Clear() {
size_t tensor_count = tensor_vector.size();
for (size_t ti = 0; ti < tensor_count; ++ti) {
tensor_vector[ti].shape.clear();
}
tensor_vector.clear();
}
std::string ShortDebugString() const { return "Not implemented!"; }
};
class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel<
GeneralReaderOutput> {
public:
typedef std::vector<paddle::PaddleTensor> TensorVector;
DECLARE_OP(GeneralReaderOp);
int inference();
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
...@@ -24,13 +24,13 @@ def batch_predict(batch_size=4): ...@@ -24,13 +24,13 @@ def batch_predict(batch_size=4):
client.load_client_config(conf_file) client.load_client_config(conf_file)
client.connect(["127.0.0.1:8010"]) client.connect(["127.0.0.1:8010"])
start = time.time() start = time.time()
fetch = ["acc", "cost", "prediction"]
feed_batch = [] feed_batch = []
for line in sys.stdin: for line in sys.stdin:
group = line.strip().split() group = line.strip().split()
words = [int(x) for x in group[1:int(group[0])]] words = [int(x) for x in group[1:int(group[0])]]
label = [int(group[-1])] label = [int(group[-1])]
feed = {"words": words, "label": label} feed = {"words": words, "label": label}
fetch = ["acc", "cost", "prediction"]
feed_batch.append(feed) feed_batch.append(feed)
if len(feed_batch) == batch_size: if len(feed_batch) == batch_size:
fetch_batch = client.batch_predict( fetch_batch = client.batch_predict(
...@@ -39,6 +39,11 @@ def batch_predict(batch_size=4): ...@@ -39,6 +39,11 @@ def batch_predict(batch_size=4):
print("{} {}".format(fetch_batch[i]["prediction"][1], print("{} {}".format(fetch_batch[i]["prediction"][1],
feed_batch[i]["label"][0])) feed_batch[i]["label"][0]))
feed_batch = [] feed_batch = []
if len(feed_batch) > 0:
fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
for i in range(len(feed_batch)):
print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
"label"][0]))
cost = time.time() - start cost = time.time() - start
print("total cost : {}".format(cost)) print("total cost : {}".format(cost))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册