提交 93800a5e 编写于 作者: X xulongteng

fix conflict

...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "sdk-cpp/bert_service.pb.h" #include "sdk-cpp/bert_service.pb.h"
#include "sdk-cpp/include/common.h" #include "sdk-cpp/include/common.h"
#include "sdk-cpp/include/predictor_sdk.h" #include "sdk-cpp/include/predictor_sdk.h"
#include "data_pre.h"
using baidu::paddle_serving::sdk_cpp::Predictor; using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi; using baidu::paddle_serving::sdk_cpp::PredictorApi;
...@@ -31,31 +32,17 @@ using baidu::paddle_serving::predictor::bert_service::BertResInstance; ...@@ -31,31 +32,17 @@ using baidu::paddle_serving::predictor::bert_service::BertResInstance;
using baidu::paddle_serving::predictor::bert_service::BertReqInstance; using baidu::paddle_serving::predictor::bert_service::BertReqInstance;
using baidu::paddle_serving::predictor::bert_service::Embedding_values; using baidu::paddle_serving::predictor::bert_service::Embedding_values;
int batch_size = 49; extern int batch_size = 1;
int max_seq_len = 82; extern int max_seq_len = 128;
int layer_num = 12; extern int layer_num = 12;
int emb_size = 768; extern int emb_size = 768;
int thread_num = 1; extern int thread_num = 1;
std::atomic<int> g_concurrency(0); std::atomic<int> g_concurrency(0);
std::vector<std::vector<int>> response_time; std::vector<std::vector<int>> response_time;
char* data_filename = "./data/bert/demo_wiki_train"; char* data_filename = "./data/bert/demo_wiki_train";
std::vector<std::string> split(const std::string& str, #if 1
const std::string& pattern) {
std::vector<std::string> res;
if (str == "") return res;
std::string strs = str + pattern;
size_t pos = strs.find(pattern);
while (pos != strs.npos) {
std::string temp = strs.substr(0, pos);
res.push_back(temp);
strs = strs.substr(pos + 1, strs.size());
pos = strs.find(pattern);
}
return res;
}
/*
int create_req(Request* req, int create_req(Request* req,
const std::vector<std::string>& data_list, const std::vector<std::string>& data_list,
int data_index, int data_index,
...@@ -90,59 +77,13 @@ int create_req(Request* req, ...@@ -90,59 +77,13 @@ int create_req(Request* req,
ins->add_input_masks(0.0); ins->add_input_masks(0.0);
} }
} }
ins->set_max_seq_len(max_seq_len);
} }
return 0; return 0;
} }
*/ #else
int create_req(Request* req,
const std::vector<std::string>& data_list,
int data_index,
int batch_size) {
// add data
// avoid out of boundary
int cur_index = data_index;
if (cur_index >= data_list.size()) {
cur_index = cur_index % data_list.size();
}
std::vector<std::string> feature_list = split(data_list[cur_index], ";");
std::vector<std::string> src_field = split(feature_list[0], ":");
std::vector<std::string> src_ids = split(src_field[1], " ");
std::vector<std::string> pos_field = split(feature_list[1], ":");
std::vector<std::string> pos_ids = split(pos_field[1], " ");
std::vector<std::string> sent_field = split(feature_list[2], ":");
std::vector<std::string> sent_ids = split(sent_field[1], " ");
std::vector<std::string> mask_field = split(feature_list[3], ":");
std::vector<std::string> input_mask = split(mask_field[1], " ");
std::vector<int> shape;
std::vector<std::string> shapes = split(src_field[0], " ");
for (auto x: shapes) {
shape.push_back(std::stoi(x));
}
for (int i = 0; i < batch_size && i < shape[0]; ++i) {
BertReqInstance* ins = req->add_instances();
if (!ins) {
LOG(ERROR) << "Failed create req instance";
return -1;
}
for (int fi = 0; fi < max_seq_len; fi++) {
ins->add_token_ids(std::stoi(src_ids[i * max_seq_len + fi]));
ins->add_position_ids(std::stoi(pos_ids[i * max_seq_len + fi]));
ins->add_sentence_type_ids(std::stoi(sent_ids[i * max_seq_len + fi]));
ins->add_input_masks(std::stof(input_mask[i * max_seq_len + fi]));
}
}
return 0;
}
#if 0
int create_req(Request* req, int create_req(Request* req,
const std::vector<std::string>& data_list, const std::vector<std::string>& data_list,
int data_index, int data_index,
...@@ -167,11 +108,11 @@ int create_req(Request* req, ...@@ -167,11 +108,11 @@ int create_req(Request* req,
std::vector<std::string> seg_list = split(feature_list[3], " "); std::vector<std::string> seg_list = split(feature_list[3], " ");
std::vector<std::string> mask_list = split(feature_list[4], " "); std::vector<std::string> mask_list = split(feature_list[4], " ");
for (int fi = 0; fi < max_seq_len; fi++) { for (int fi = 0; fi < max_seq_len; fi++) {
if (fi < std::stoi(shape_list[1])) { if (fi < token_list.size()) {
ins->add_token_ids(std::stoi(token_list[fi + (i * max_seq_len)])); ins->add_token_ids(std::stoi(token_list[fi]));
ins->add_sentence_type_ids(std::stoll(seg_list[fi + (i * max_seq_len)])); ins->add_sentence_type_ids(std::stoll(seg_list[fi]));
ins->add_position_ids(std::stoll(pos_list[fi + (i * max_seq_len)])); ins->add_position_ids(std::stoll(pos_list[fi]));
ins->add_input_masks(std::stof(mask_list[fi + (i * max_seq_len)])); ins->add_input_masks(std::stof(mask_list[fi]));
} else { } else {
ins->add_token_ids(0); ins->add_token_ids(0);
ins->add_sentence_type_ids(0); ins->add_sentence_type_ids(0);
...@@ -182,6 +123,7 @@ int create_req(Request* req, ...@@ -182,6 +123,7 @@ int create_req(Request* req,
} }
return 0; return 0;
} }
#endif #endif
void print_res(const Request& req, void print_res(const Request& req,
...@@ -232,16 +174,10 @@ void thread_worker(PredictorApi* api, ...@@ -232,16 +174,10 @@ void thread_worker(PredictorApi* api,
} }
g_concurrency++; g_concurrency++;
LOG(INFO) << "Current concurrency " << g_concurrency.load(); LOG(INFO) << "Current concurrency " << g_concurrency.load();
#if 0
int data_index = turns * batch_size; int data_index = turns * batch_size;
if (create_req(&req, data_list, data_index, batch_size) != 0) { if (create_req(&req, data_list, data_index, batch_size) != 0) {
return; return;
} }
#else
if (create_req(&req, data_list, turns, batch_size) != 0) {
return;
}
#endif
if (predictor->inference(&req, &res) != 0) { if (predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req:" << req.ShortDebugString(); LOG(ERROR) << "failed call predictor with req:" << req.ShortDebugString();
return; return;
...@@ -310,6 +246,11 @@ int main(int argc, char** argv) { ...@@ -310,6 +246,11 @@ int main(int argc, char** argv) {
PredictorApi api; PredictorApi api;
response_time.resize(thread_num); response_time.resize(thread_num);
int server_concurrency = thread_num; int server_concurrency = thread_num;
if (argc > 1) {
thread_num = std::stoi(argv[1]);
batch_size = std::stoi(argv[2]);
max_seq_len = std::stoi(argv[3]);
}
// log set // log set
#ifdef BCLOUD #ifdef BCLOUD
logging::LoggingSettings settings; logging::LoggingSettings settings;
......
...@@ -17,9 +17,6 @@ ...@@ -17,9 +17,6 @@
#include <string> #include <string>
#include "predictor/framework/infer.h" #include "predictor/framework/infer.h"
#include "predictor/framework/memory.h" #include "predictor/framework/memory.h"
#if 1
#include <sstream>
#endif
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace serving { namespace serving {
...@@ -31,7 +28,7 @@ using baidu::paddle_serving::predictor::bert_service::BertReqInstance; ...@@ -31,7 +28,7 @@ using baidu::paddle_serving::predictor::bert_service::BertReqInstance;
using baidu::paddle_serving::predictor::bert_service::Request; using baidu::paddle_serving::predictor::bert_service::Request;
using baidu::paddle_serving::predictor::bert_service::Embedding_values; using baidu::paddle_serving::predictor::bert_service::Embedding_values;
const uint32_t MAX_SEQ_LEN = 82; extern int64_t MAX_SEQ_LEN = 128;
const bool POOLING = true; const bool POOLING = true;
const int LAYER_NUM = 12; const int LAYER_NUM = 12;
const int EMB_SIZE = 768; const int EMB_SIZE = 768;
...@@ -48,6 +45,8 @@ int BertServiceOp::inference() { ...@@ -48,6 +45,8 @@ int BertServiceOp::inference() {
return 0; return 0;
} }
MAX_SEQ_LEN = req->instances(0).max_seq_len();
paddle::PaddleTensor src_ids; paddle::PaddleTensor src_ids;
paddle::PaddleTensor pos_ids; paddle::PaddleTensor pos_ids;
paddle::PaddleTensor seg_ids; paddle::PaddleTensor seg_ids;
...@@ -96,6 +95,7 @@ int BertServiceOp::inference() { ...@@ -96,6 +95,7 @@ int BertServiceOp::inference() {
memcpy(src_data, memcpy(src_data,
req_instance.token_ids().data(), req_instance.token_ids().data(),
sizeof(int64_t) * MAX_SEQ_LEN); sizeof(int64_t) * MAX_SEQ_LEN);
#if 1
memcpy(pos_data, memcpy(pos_data,
req_instance.position_ids().data(), req_instance.position_ids().data(),
sizeof(int64_t) * MAX_SEQ_LEN); sizeof(int64_t) * MAX_SEQ_LEN);
...@@ -105,54 +105,27 @@ int BertServiceOp::inference() { ...@@ -105,54 +105,27 @@ int BertServiceOp::inference() {
memcpy(input_masks_data, memcpy(input_masks_data,
req_instance.input_masks().data(), req_instance.input_masks().data(),
sizeof(float) * MAX_SEQ_LEN); sizeof(float) * MAX_SEQ_LEN);
#endif
index += MAX_SEQ_LEN; index += MAX_SEQ_LEN;
} }
#if 0
int64_t *src_data = static_cast<int64_t *>(src_ids.data.data());
std::ostringstream oss;
oss << "src_ids: ";
for (int i = 0; i < MAX_SEQ_LEN * batch_size; ++i) {
oss << src_data[i] << " ";
}
LOG(INFO) << oss.str();
#endif
in->push_back(src_ids); in->push_back(src_ids);
in->push_back(pos_ids); in->push_back(pos_ids);
in->push_back(seg_ids); in->push_back(seg_ids);
in->push_back(input_masks); in->push_back(input_masks);
TensorVector *out = butil::get_object<TensorVector>(); TensorVector *out = butil::get_object<TensorVector>();
// TensorVector out;
/*
if (!out) { if (!out) {
LOG(ERROR) << "Failed get tls output object"; LOG(ERROR) << "Failed get tls output object";
return -1; return -1;
} }
/*
float* example = (float*)(*in)[3].data.data();
for(uint32_t i = 0; i < MAX_SEQ_LEN; i++){
LOG(INFO) << *(example + i);
*/ */
LOG(INFO) << "batch_size : " << batch_size;
for (int j = 0; j < 3; j ++) {
LOG(INFO) << "name : " << (*in)[j].name << " shape : " << (*in)[j].shape[0]
<< " " << (*in)[j].shape[1] << " " << (*in)[j].shape[2];
int64_t* example = (int64_t*)(*in)[j].data.data();
std::ostringstream oss;
for(uint32_t i = MAX_SEQ_LEN * (batch_size - 1); i < MAX_SEQ_LEN * batch_size; i++){
oss << *(example + i);
}
LOG(INFO) << "data : " << oss.str();
}
for (int j =3; j < 4; j++) {
LOG(INFO) << "name : " << (*in)[j].name << " shape : " << (*in)[j].shape[0]
<< " " << (*in)[j].shape[1] << " " << (*in)[j].shape[2];
float* example = (float*)(*in)[j].data.data();
std::ostringstream oss;
for(uint32_t i = MAX_SEQ_LEN * (batch_size - 1); i < MAX_SEQ_LEN * batch_size; i++){
oss << *(example + i);
}
LOG(INFO) << "data : " << oss.str();
}
if (predictor::InferManager::instance().infer( if (predictor::InferManager::instance().infer(
BERT_MODEL_NAME, in, out, batch_size)) { BERT_MODEL_NAME, in, out, batch_size)) {
...@@ -160,20 +133,12 @@ int BertServiceOp::inference() { ...@@ -160,20 +133,12 @@ int BertServiceOp::inference() {
return -1; return -1;
} }
/*
paddle::NativeConfig config;
config.model_dir = "./data/model/paddle/fluid/bert";
auto predictor = CreatePaddlePredictor(config);
predictor->Run(*in, &out);
*/
#if 0 #if 0
// float *out_data = static_cast<float *>(out->at(0).data.data());
LOG(INFO) << "check point";
/*
LOG(INFO) << "batch_size : " << out->at(0).shape[0] LOG(INFO) << "batch_size : " << out->at(0).shape[0]
<< " seq_len : " << out->at(0).shape[1] << " seq_len : " << out->at(0).shape[1]
<< " emb_size : " << out->at(0).shape[2]; << " emb_size : " << out->at(0).shape[2];
float *out_data = (float*) out->at(0).data.data();
for (uint32_t bi = 0; bi < batch_size; bi++) { for (uint32_t bi = 0; bi < batch_size; bi++) {
BertResInstance *res_instance = res->add_instances(); BertResInstance *res_instance = res->add_instances();
for (uint32_t si = 0; si < MAX_SEQ_LEN; si++) { for (uint32_t si = 0; si < MAX_SEQ_LEN; si++) {
...@@ -184,43 +149,22 @@ int BertServiceOp::inference() { ...@@ -184,43 +149,22 @@ int BertServiceOp::inference() {
} }
} }
} }
for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear();
}
in->clear();
butil::return_object<TensorVector>(in);
for (size_t i = 0; i < out->size(); ++i) {
(*out)[i].shape.clear();
}
out->clear();
butil::return_object<TensorVector>(out);
*/
#else #else
float *out_data = static_cast<float *>(out->at(0).data.data()); LOG(INFO) << "batch_size : " << out->at(0).shape[0]
std::ostringstream oss; << " emb_size : " << out->at(0).shape[1];
oss << "Shape: ["; float *out_data = (float*) out->at(0).data.data();
for (auto x: out->at(0).shape) {
oss << x << " ";
}
oss << "]";
LOG(INFO) << oss.str();
// Output shape is [batch_size x 3]
for (uint32_t bi = 0; bi < batch_size; bi++) { for (uint32_t bi = 0; bi < batch_size; bi++) {
BertResInstance *res_instance = res->add_instances(); BertResInstance *res_instance = res->add_instances();
std::ostringstream oss; for (uint32_t si = 0; si < 1; si++) {
oss << "Sample " << bi << " ["; Embedding_values *emb_instance = res_instance->add_instances();
oss << out_data[bi * 3 + 0] << " " for (uint32_t ei = 0; ei < EMB_SIZE; ei++) {
<< out_data[bi * 3 + 1] << " " uint32_t index = bi * MAX_SEQ_LEN * EMB_SIZE + si * EMB_SIZE + ei;
<< out_data[bi * 3 + 2] << "]"; emb_instance->add_values(out_data[index]);
LOG(INFO) << oss.str(); }
}
} }
#endif
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear(); (*in)[i].shape.clear();
} }
...@@ -232,7 +176,6 @@ int BertServiceOp::inference() { ...@@ -232,7 +176,6 @@ int BertServiceOp::inference() {
} }
out->clear(); out->clear();
butil::return_object<TensorVector>(out); butil::return_object<TensorVector>(out);
#endif
return 0; return 0;
} }
......
...@@ -25,6 +25,7 @@ message BertReqInstance { ...@@ -25,6 +25,7 @@ message BertReqInstance {
repeated int64 sentence_type_ids = 2; repeated int64 sentence_type_ids = 2;
repeated int64 position_ids = 3; repeated int64 position_ids = 3;
repeated float input_masks = 4; repeated float input_masks = 4;
required int64 max_seq_len = 5;
}; };
message Request { repeated BertReqInstance instances = 1; }; message Request { repeated BertReqInstance instances = 1; };
......
...@@ -200,6 +200,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -200,6 +200,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableUseGpu(100, FLAGS_gpuid); analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true); analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
analysis_config.SwitchIrOptim(true);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim(params.static_optimization(),
......
...@@ -25,6 +25,7 @@ message BertReqInstance { ...@@ -25,6 +25,7 @@ message BertReqInstance {
repeated int64 sentence_type_ids = 2; repeated int64 sentence_type_ids = 2;
repeated int64 position_ids = 3; repeated int64 position_ids = 3;
repeated float input_masks = 4; repeated float input_masks = 4;
required int64 max_seq_len = 5;
}; };
message Request { repeated BertReqInstance instances = 1; }; message Request { repeated BertReqInstance instances = 1; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册