提交 afcd0199 编写于 作者: X xulongteng

Merge remote-tracking branch 'refs/remotes/origin/bert' into bert

...@@ -36,9 +36,11 @@ extern int max_seq_len = 128; ...@@ -36,9 +36,11 @@ extern int max_seq_len = 128;
extern int layer_num = 12; extern int layer_num = 12;
extern int emb_size = 768; extern int emb_size = 768;
extern int thread_num = 1; extern int thread_num = 1;
extern int max_turn = 1000;
std::atomic<int> g_concurrency(0); std::atomic<int> g_concurrency(0);
std::vector<std::vector<int>> response_time; std::vector<std::vector<int>> response_time;
std::vector<std::vector<int>> infer_time;
char* data_filename = "./data/bert/demo_wiki_train"; char* data_filename = "./data/bert/demo_wiki_train";
#if 1 #if 1
...@@ -64,7 +66,7 @@ int create_req(Request* req, ...@@ -64,7 +66,7 @@ int create_req(Request* req,
std::vector<std::string> seg_list = split(feature_list[1], " "); std::vector<std::string> seg_list = split(feature_list[1], " ");
std::vector<std::string> pos_list = split(feature_list[2], " "); std::vector<std::string> pos_list = split(feature_list[2], " ");
for (int fi = 0; fi < max_seq_len; fi++) { for (int fi = 0; fi < max_seq_len; fi++) {
if (fi < token_list.size()) { if (std::stoi(token_list[fi]) != 0) {
ins->add_token_ids(std::stoi(token_list[fi])); ins->add_token_ids(std::stoi(token_list[fi]));
ins->add_sentence_type_ids(std::stoi(seg_list[fi])); ins->add_sentence_type_ids(std::stoi(seg_list[fi]));
ins->add_position_ids(std::stoi(pos_list[fi])); ins->add_position_ids(std::stoi(pos_list[fi]));
...@@ -157,7 +159,7 @@ void thread_worker(PredictorApi* api, ...@@ -157,7 +159,7 @@ void thread_worker(PredictorApi* api,
api->thrd_initialize(); api->thrd_initialize();
std::string line; std::string line;
int turns = 0; int turns = 0;
while (turns < 1000) { while (turns < max_turn) {
timeval start; timeval start;
gettimeofday(&start, NULL); gettimeofday(&start, NULL);
api->thrd_clear(); api->thrd_clear();
......
...@@ -34,6 +34,9 @@ const int LAYER_NUM = 12; ...@@ -34,6 +34,9 @@ const int LAYER_NUM = 12;
const int EMB_SIZE = 768; const int EMB_SIZE = 768;
int BertServiceOp::inference() { int BertServiceOp::inference() {
timeval op_start;
gettimeofday(&op_start, NULL);
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
TensorVector *in = butil::get_object<TensorVector>(); TensorVector *in = butil::get_object<TensorVector>();
...@@ -120,18 +123,33 @@ int BertServiceOp::inference() { ...@@ -120,18 +123,33 @@ int BertServiceOp::inference() {
return -1; return -1;
} }
/* #if 0 // print request
float* example = (float*)(*in)[3].data.data(); std::ostringstream oss;
for(uint32_t i = 0; i < MAX_SEQ_LEN; i++){ for (int j = 0; j < 3; j++) {
LOG(INFO) << *(example + i); int64_t* example = reinterpret_cast<int64_t*>(*in)[j].data.data();
*/ for (uint32_t i = 0; i < MAX_SEQ_LEN; i++) {
oss << *(example + i) << " ";
}
oss << ";";
}
float* example = reinterpret_cast<float*>(*in)[3].data.data();
for (int i = 0; i < MAX_SEQ_LEN; i++) {
oss << *(example + i) << " ";
}
LOG(INFO) << "msg: " << oss.str();
#endif
timeval infer_start;
gettimeofday(&infer_start, NULL);
if (predictor::InferManager::instance().infer( if (predictor::InferManager::instance().infer(
BERT_MODEL_NAME, in, out, batch_size)) { BERT_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << BERT_MODEL_NAME; LOG(ERROR) << "Failed do infer in fluid model: " << BERT_MODEL_NAME;
return -1; return -1;
} }
timeval infer_end;
gettimeofday(&infer_end, NULL);
uint64_t infer_time =
(infer_end.tv_sec * 1000 + infer_end.tv_usec / 1000 -
(infer_start.tv_sec * 1000 + infer_start.tv_usec / 1000));
#if 0 #if 0
LOG(INFO) << "batch_size : " << out->at(0).shape[0] LOG(INFO) << "batch_size : " << out->at(0).shape[0]
<< " seq_len : " << out->at(0).shape[1] << " seq_len : " << out->at(0).shape[1]
...@@ -163,6 +181,13 @@ int BertServiceOp::inference() { ...@@ -163,6 +181,13 @@ int BertServiceOp::inference() {
} }
} }
timeval op_end;
gettimeofday(&op_end, NULL);
uint64_t op_time = (op_end.tv_sec * 1000 + op_end.tv_usec / 1000 -
(op_start.tv_sec * 1000 + op_start.tv_usec / 1000));
res->set_op_time(op_time);
res->set_infer_time(infer_time);
#endif #endif
for (size_t i = 0; i < in->size(); ++i) { for (size_t i = 0; i < in->size(); ++i) {
(*in)[i].shape.clear(); (*in)[i].shape.clear();
...@@ -175,6 +200,7 @@ int BertServiceOp::inference() { ...@@ -175,6 +200,7 @@ int BertServiceOp::inference() {
} }
out->clear(); out->clear();
butil::return_object<TensorVector>(out); butil::return_object<TensorVector>(out);
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册