提交 8dbf2c07 编写于 作者: B barrierye

fix general_text_response_op && fix timeline

上级 ab645d8c
......@@ -21,15 +21,13 @@
#include <fstream>
#include <map>
#include <string>
#include <utility> // move
#include <vector>
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#define BLOG(fmt, ...) \
printf( \
"[%s:%s]:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi;
......
......@@ -248,11 +248,10 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
output.insts(0).tensor_array(idx).float_data(i);
}
}
// TODO
postprocess_end = timeline.TimeStampUS();
}
predict_res.add_model_res(std::move(model));
}
postprocess_end = timeline.TimeStampUS();
}
if (FLAGS_profile_client) {
......@@ -263,7 +262,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
// TODO: multi-model
// TODO(barriery): multi-model profile time
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
......@@ -431,8 +430,8 @@ int PredictorClient::batch_predict(
}
}
predict_res_batch.add_model_res(std::move(model));
postprocess_end = timeline.TimeStampUS();
}
postprocess_end = timeline.TimeStampUS();
}
if (FLAGS_profile_client) {
......@@ -443,7 +442,7 @@ int PredictorClient::batch_predict(
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
// TODO: multi-models
// TODO(barriery): multi-model profile time
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
......
......@@ -176,12 +176,22 @@ int GeneralResponseOp::inference() {
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
for (uint32_t i = 0; i < pre_node_names.size(); ++i) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[i]);
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size;
ModelOutput *output = res->mutable_outputs(i);
for (int i = 0; i < input_blob->p_size; ++i) {
output->add_profile_time(input_blob->time_stamp[i]);
ModelOutput *output = res->mutable_outputs(pi);
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
}
// TODO(guru4elephant): find more elegant way to do this
......
......@@ -32,34 +32,18 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
return -1;
}
VLOG(2) << "pre node names size: " << pre_node_names.size();
// TODO: multi-predecessor
/*
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "infer batch size: " << batch_size;
const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline;
int64_t start = timeline.TimeStampUS();
......@@ -79,65 +63,97 @@ int GeneralTextResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " ("
<< pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
return -1;
}
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = res->add_insts();
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size;
ModelOutput *output = res->add_outputs();
output->set_engine_name(
pre_name); // To get the order of model return values
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = output->add_insts();
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
}
}
}
int var_idx = 0;
for (auto &idx : fetch_index) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
int cap = 1;
for (int j = 1; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) {
for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
k++) {
res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data(
data_ptr[k]);
}
int var_idx = 0;
for (auto &idx : fetch_index) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
int cap = 1;
for (int j = 1; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data(
data_ptr[k]);
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) {
for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
k++) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
}
var_idx++;
}
var_idx++;
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size;
ModelOutput *output = res->mutable_outputs(pi);
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
*/
return 0;
}
DEFINE_OP(GeneralTextResponseOp);
......
......@@ -45,8 +45,7 @@ message Response {
message ModelOutput {
repeated FetchInst insts = 1;
repeated int64 profile_time = 2;
optional string engine_name = 3;
optional string engine_name = 2;
}
service GeneralModelService {
......
......@@ -14,6 +14,7 @@
#include "core/predictor/framework/dag.h"
#include <string>
#include <utility> // make_pair
#include <vector>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/predictor_metric.h" // PredictorMetric
......@@ -210,11 +211,11 @@ int Dag::topo_sort() {
uint32_t pnid = Dag::node_by_name(it->first)->id -
1; // 0 is reserved for begginer-op
in_egde[pnid].push_back(nid);
LOG(INFO) << "inegde[" << pnid << "]: " << nid;
}
}
for (int i = 0; i < in_degree.size(); ++i) {
LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i << "]: " << in_degree[i];
LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i
<< "]: " << in_degree[i];
}
int sorted_num = 0;
DagStage* stage = new (std::nothrow) DagStage();
......@@ -228,7 +229,6 @@ int Dag::topo_sort() {
stage->full_name = full_name() + NAME_DELIMITER + stage->name;
for (uint32_t nid = 0; nid < nodes_size; ++nid) {
if (in_degree[nid] == 0) {
LOG(INFO) << "nid:" << nid;
++sorted_num;
stage->nodes.push_back(_index_nodes[nid]);
// assign stage number after stage created
......@@ -254,13 +254,10 @@ int Dag::topo_sort() {
stage->full_name = full_name() + NAME_DELIMITER + stage->name;
for (uint32_t pi = 0; pi < pre_nodes.size(); ++pi) {
uint32_t pnid = pre_nodes[pi]->id - 1;
LOG(INFO) << "pnid: " << pnid;
for (uint32_t ei = 0; ei < in_egde[pnid].size(); ++ei) {
uint32_t nid = in_egde[pnid][ei];
--in_degree[nid];
LOG(INFO) << "nid: " << nid << ", indeg: " << in_degree[nid];
if (in_degree[nid] == 0) {
LOG(INFO) << "nid: " << nid;
++sorted_num;
stage->nodes.push_back(_index_nodes[nid]);
// assign stage number after stage created
......@@ -277,26 +274,7 @@ int Dag::topo_sort() {
}
_stages.push_back(stage);
}
/*std::stringstream ss;*/
// for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
// DagStage* stage = new (std::nothrow) DagStage();
// if (stage == NULL) {
// LOG(ERROR) << "Invalid stage!";
// return ERR_MEM_ALLOC_FAILURE;
//}
// stage->nodes.push_back(_index_nodes[nid]);
// ss.str("");
// ss << _stages.size();
// stage->name = ss.str();
// stage->full_name = full_name() + NAME_DELIMITER + stage->name;
//_stages.push_back(stage);
//// assign stage number after stage created
//_index_nodes[nid]->stage = nid;
//// assign dag node full name after stage created
//_index_nodes[nid]->full_name =
// stage->full_name + NAME_DELIMITER + _index_nodes[nid]->name;
/*}*/
return ERR_OK;
}
......
......@@ -45,8 +45,7 @@ message Response {
message ModelOutput {
repeated FetchInst insts = 1;
repeated int64 profile_time = 2;
optional string engine_name = 3;
optional string engine_name = 2;
}
service GeneralModelService {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册