提交 fa4117d9 编写于 作者: B barrierye

fix general_text_response_op && fix timeline

上级 cb2e00d9
...@@ -21,15 +21,13 @@ ...@@ -21,15 +21,13 @@
#include <fstream> #include <fstream>
#include <map> #include <map>
#include <string> #include <string>
#include <utility> // move
#include <vector> #include <vector>
#include "core/sdk-cpp/builtin_format.pb.h" #include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h" #include "core/sdk-cpp/general_model_service.pb.h"
#include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h" #include "core/sdk-cpp/include/predictor_sdk.h"
#define BLOG(fmt, ...) \
printf( \
"[%s:%s]:%d " fmt "\n", __FILE__, __FUNCTION__, __LINE__, ##__VA_ARGS__)
using baidu::paddle_serving::sdk_cpp::Predictor; using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi; using baidu::paddle_serving::sdk_cpp::PredictorApi;
......
...@@ -248,11 +248,10 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed, ...@@ -248,11 +248,10 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
output.insts(0).tensor_array(idx).float_data(i); output.insts(0).tensor_array(idx).float_data(i);
} }
} }
// TODO
postprocess_end = timeline.TimeStampUS();
} }
predict_res.add_model_res(std::move(model)); predict_res.add_model_res(std::move(model));
} }
postprocess_end = timeline.TimeStampUS();
} }
if (FLAGS_profile_client) { if (FLAGS_profile_client) {
...@@ -263,7 +262,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed, ...@@ -263,7 +262,7 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
<< "prepro_1:" << preprocess_end << " " << "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " " << "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " "; << "client_infer_1:" << client_infer_end << " ";
// TODO: multi-model // TODO(barriery): multi-model profile time
if (FLAGS_profile_server) { if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2; int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) { for (int i = 0; i < op_num; ++i) {
...@@ -431,8 +430,8 @@ int PredictorClient::batch_predict( ...@@ -431,8 +430,8 @@ int PredictorClient::batch_predict(
} }
} }
predict_res_batch.add_model_res(std::move(model)); predict_res_batch.add_model_res(std::move(model));
postprocess_end = timeline.TimeStampUS();
} }
postprocess_end = timeline.TimeStampUS();
} }
if (FLAGS_profile_client) { if (FLAGS_profile_client) {
...@@ -443,7 +442,7 @@ int PredictorClient::batch_predict( ...@@ -443,7 +442,7 @@ int PredictorClient::batch_predict(
<< "prepro_1:" << preprocess_end << " " << "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " " << "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " "; << "client_infer_1:" << client_infer_end << " ";
// TODO: multi-models // TODO(barriery): multi-model profile time
if (FLAGS_profile_server) { if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2; int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) { for (int i = 0; i < op_num; ++i) {
......
...@@ -176,12 +176,22 @@ int GeneralResponseOp::inference() { ...@@ -176,12 +176,22 @@ int GeneralResponseOp::inference() {
if (req->profile_server()) { if (req->profile_server()) {
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
for (uint32_t i = 0; i < pre_node_names.size(); ++i) { // TODO(barriery): multi-model profile_time.
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[i]); // At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size; VLOG(2) << "p size for input blob: " << input_blob->p_size;
ModelOutput *output = res->mutable_outputs(i); ModelOutput *output = res->mutable_outputs(pi);
for (int i = 0; i < input_blob->p_size; ++i) { int profile_time_idx = -1;
output->add_profile_time(input_blob->time_stamp[i]); if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
} }
} }
// TODO(guru4elephant): find more elegant way to do this // TODO(guru4elephant): find more elegant way to do this
......
...@@ -32,34 +32,18 @@ using baidu::paddle_serving::predictor::general_model::Tensor; ...@@ -32,34 +32,18 @@ using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::FetchInst; using baidu::paddle_serving::predictor::general_model::FetchInst;
using baidu::paddle_serving::predictor::general_model::ModelOutput;
using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextResponseOp::inference() { int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference"; VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) { VLOG(2) << "pre node names size: " << pre_node_names.size();
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
return -1;
}
// TODO: multi-predecessor
/*
const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "infer batch size: " << batch_size;
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
...@@ -79,65 +63,97 @@ int GeneralTextResponseOp::inference() { ...@@ -79,65 +63,97 @@ int GeneralTextResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
// response inst with only fetch_var_names const GeneralBlob *input_blob;
Response *res = mutable_data<Response>(); for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " ("
<< pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name;
return -1;
}
for (int i = 0; i < batch_size; ++i) { const TensorVector *in = &input_blob->tensor_vector;
FetchInst *fetch_inst = res->add_insts(); int batch_size = input_blob->GetBatchSize();
for (auto &idx : fetch_index) { VLOG(2) << "input batch size: " << batch_size;
Tensor *tensor = fetch_inst->add_tensor_array();
// currently only response float tensor or lod_tensor ModelOutput *output = res->add_outputs();
tensor->set_elem_type(1); output->set_engine_name(
if (model_config->_is_lod_fetch[idx]) { pre_name); // To get the order of model return values
VLOG(2) << "out[" << idx << " is lod_tensor"; for (int i = 0; i < batch_size; ++i) {
tensor->add_shape(-1); FetchInst *fetch_inst = output->add_insts();
} else { for (auto &idx : fetch_index) {
VLOG(2) << "out[" << idx << "] is tensor"; Tensor *tensor = fetch_inst->add_tensor_array();
for (int k = 1; k < in->at(idx).shape.size(); ++k) { // currently only response float tensor or lod_tensor
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; tensor->set_elem_type(1);
tensor->add_shape(in->at(idx).shape[k]); if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
} }
} }
} }
}
int var_idx = 0; int var_idx = 0;
for (auto &idx : fetch_index) { for (auto &idx : fetch_index) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data()); float *data_ptr = static_cast<float *>(in->at(idx).data.data());
int cap = 1; int cap = 1;
for (int j = 1; j < in->at(idx).shape.size(); ++j) { for (int j = 1; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j]; cap *= in->at(idx).shape[j];
}
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) {
for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
k++) {
res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data(
data_ptr[k]);
}
} }
} else { if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) { for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
res->mutable_insts(j)->mutable_tensor_array(var_idx)->add_float_data( k++) {
data_ptr[k]); output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
}
} else {
for (int j = 0; j < batch_size; ++j) {
for (int k = j * cap; k < (j + 1) * cap; ++k) {
output->mutable_insts(j)
->mutable_tensor_array(var_idx)
->add_float_data(data_ptr[k]);
}
} }
} }
var_idx++;
} }
var_idx++;
} }
if (req->profile_server()) { if (req->profile_server()) {
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
// TODO(barriery): multi-model profile_time.
for (int i = 0; i < input_blob->p_size; ++i) { // At present, only the response_op is multi-input, so here we get
res->add_profile_time(input_blob->time_stamp[i]); // the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size;
ModelOutput *output = res->mutable_outputs(pi);
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
} }
// TODO(guru4elephant): find more elegant way to do this // TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start); res->add_profile_time(start);
res->add_profile_time(end); res->add_profile_time(end);
} }
*/
return 0; return 0;
} }
DEFINE_OP(GeneralTextResponseOp); DEFINE_OP(GeneralTextResponseOp);
......
...@@ -45,8 +45,7 @@ message Response { ...@@ -45,8 +45,7 @@ message Response {
message ModelOutput { message ModelOutput {
repeated FetchInst insts = 1; repeated FetchInst insts = 1;
repeated int64 profile_time = 2; optional string engine_name = 2;
optional string engine_name = 3;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "core/predictor/framework/dag.h" #include "core/predictor/framework/dag.h"
#include <string> #include <string>
#include <utility> // make_pair
#include <vector> #include <vector>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/predictor_metric.h" // PredictorMetric #include "core/predictor/framework/predictor_metric.h" // PredictorMetric
...@@ -210,11 +211,11 @@ int Dag::topo_sort() { ...@@ -210,11 +211,11 @@ int Dag::topo_sort() {
uint32_t pnid = Dag::node_by_name(it->first)->id - uint32_t pnid = Dag::node_by_name(it->first)->id -
1; // 0 is reserved for begginer-op 1; // 0 is reserved for begginer-op
in_egde[pnid].push_back(nid); in_egde[pnid].push_back(nid);
LOG(INFO) << "inegde[" << pnid << "]: " << nid;
} }
} }
for (int i = 0; i < in_degree.size(); ++i) { for (int i = 0; i < in_degree.size(); ++i) {
LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i << "]: " << in_degree[i]; LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i
<< "]: " << in_degree[i];
} }
int sorted_num = 0; int sorted_num = 0;
DagStage* stage = new (std::nothrow) DagStage(); DagStage* stage = new (std::nothrow) DagStage();
...@@ -228,7 +229,6 @@ int Dag::topo_sort() { ...@@ -228,7 +229,6 @@ int Dag::topo_sort() {
stage->full_name = full_name() + NAME_DELIMITER + stage->name; stage->full_name = full_name() + NAME_DELIMITER + stage->name;
for (uint32_t nid = 0; nid < nodes_size; ++nid) { for (uint32_t nid = 0; nid < nodes_size; ++nid) {
if (in_degree[nid] == 0) { if (in_degree[nid] == 0) {
LOG(INFO) << "nid:" << nid;
++sorted_num; ++sorted_num;
stage->nodes.push_back(_index_nodes[nid]); stage->nodes.push_back(_index_nodes[nid]);
// assign stage number after stage created // assign stage number after stage created
...@@ -254,13 +254,10 @@ int Dag::topo_sort() { ...@@ -254,13 +254,10 @@ int Dag::topo_sort() {
stage->full_name = full_name() + NAME_DELIMITER + stage->name; stage->full_name = full_name() + NAME_DELIMITER + stage->name;
for (uint32_t pi = 0; pi < pre_nodes.size(); ++pi) { for (uint32_t pi = 0; pi < pre_nodes.size(); ++pi) {
uint32_t pnid = pre_nodes[pi]->id - 1; uint32_t pnid = pre_nodes[pi]->id - 1;
LOG(INFO) << "pnid: " << pnid;
for (uint32_t ei = 0; ei < in_egde[pnid].size(); ++ei) { for (uint32_t ei = 0; ei < in_egde[pnid].size(); ++ei) {
uint32_t nid = in_egde[pnid][ei]; uint32_t nid = in_egde[pnid][ei];
--in_degree[nid]; --in_degree[nid];
LOG(INFO) << "nid: " << nid << ", indeg: " << in_degree[nid];
if (in_degree[nid] == 0) { if (in_degree[nid] == 0) {
LOG(INFO) << "nid: " << nid;
++sorted_num; ++sorted_num;
stage->nodes.push_back(_index_nodes[nid]); stage->nodes.push_back(_index_nodes[nid]);
// assign stage number after stage created // assign stage number after stage created
...@@ -277,26 +274,7 @@ int Dag::topo_sort() { ...@@ -277,26 +274,7 @@ int Dag::topo_sort() {
} }
_stages.push_back(stage); _stages.push_back(stage);
} }
/*std::stringstream ss;*/
// for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
// DagStage* stage = new (std::nothrow) DagStage();
// if (stage == NULL) {
// LOG(ERROR) << "Invalid stage!";
// return ERR_MEM_ALLOC_FAILURE;
//}
// stage->nodes.push_back(_index_nodes[nid]);
// ss.str("");
// ss << _stages.size();
// stage->name = ss.str();
// stage->full_name = full_name() + NAME_DELIMITER + stage->name;
//_stages.push_back(stage);
//// assign stage number after stage created
//_index_nodes[nid]->stage = nid;
//// assign dag node full name after stage created
//_index_nodes[nid]->full_name =
// stage->full_name + NAME_DELIMITER + _index_nodes[nid]->name;
/*}*/
return ERR_OK; return ERR_OK;
} }
......
...@@ -45,8 +45,7 @@ message Response { ...@@ -45,8 +45,7 @@ message Response {
message ModelOutput { message ModelOutput {
repeated FetchInst insts = 1; repeated FetchInst insts = 1;
repeated int64 profile_time = 2; optional string engine_name = 2;
optional string engine_name = 3;
} }
service GeneralModelService { service GeneralModelService {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册