diff --git a/demo-client/src/bert_service.cpp b/demo-client/src/bert_service.cpp index 688d1ea6b51ccb17a42957965525f06d026239e6..7a2ac5708901e462c6016145e9e853d0cb8bc6d5 100644 --- a/demo-client/src/bert_service.cpp +++ b/demo-client/src/bert_service.cpp @@ -36,9 +36,11 @@ extern int max_seq_len = 128; extern int layer_num = 12; extern int emb_size = 768; extern int thread_num = 1; +extern int max_turn = 1000; std::atomic g_concurrency(0); std::vector> response_time; +std::vector> infer_time; char* data_filename = "./data/bert/demo_wiki_train"; #if 1 @@ -64,7 +66,7 @@ int create_req(Request* req, std::vector seg_list = split(feature_list[1], " "); std::vector pos_list = split(feature_list[2], " "); for (int fi = 0; fi < max_seq_len; fi++) { - if (fi < token_list.size()) { + if (std::stoi(token_list[fi]) != 0) { ins->add_token_ids(std::stoi(token_list[fi])); ins->add_sentence_type_ids(std::stoi(seg_list[fi])); ins->add_position_ids(std::stoi(pos_list[fi])); @@ -157,7 +159,7 @@ void thread_worker(PredictorApi* api, api->thrd_initialize(); std::string line; int turns = 0; - while (turns < 1000) { + while (turns < max_turn) { timeval start; gettimeofday(&start, NULL); api->thrd_clear(); diff --git a/demo-serving/op/bert_service_op.cpp b/demo-serving/op/bert_service_op.cpp index d393e04ab0215dce70b6b190a76b97ca09806be6..e4618dc7de989536397af4135f9c2e5f6e5164a2 100644 --- a/demo-serving/op/bert_service_op.cpp +++ b/demo-serving/op/bert_service_op.cpp @@ -34,6 +34,9 @@ const int LAYER_NUM = 12; const int EMB_SIZE = 768; int BertServiceOp::inference() { + timeval op_start; + gettimeofday(&op_start, NULL); + const Request *req = dynamic_cast(get_request_message()); TensorVector *in = butil::get_object(); @@ -120,18 +123,33 @@ int BertServiceOp::inference() { return -1; } - /* - float* example = (float*)(*in)[3].data.data(); - for(uint32_t i = 0; i < MAX_SEQ_LEN; i++){ - LOG(INFO) << *(example + i); - */ - +#if 0 // print request + std::ostringstream oss; + for (int j = 0; j < 3; j++) { + int64_t* example = reinterpret_cast(*in)[j].data.data(); + for (uint32_t i = 0; i < MAX_SEQ_LEN; i++) { + oss << *(example + i) << " "; + } + oss << ";"; + } + float* example = reinterpret_cast(*in)[3].data.data(); + for (int i = 0; i < MAX_SEQ_LEN; i++) { + oss << *(example + i) << " "; + } + LOG(INFO) << "msg: " << oss.str(); +#endif + timeval infer_start; + gettimeofday(&infer_start, NULL); if (predictor::InferManager::instance().infer( BERT_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << BERT_MODEL_NAME; return -1; } - + timeval infer_end; + gettimeofday(&infer_end, NULL); + uint64_t infer_time = + (infer_end.tv_sec * 1000 + infer_end.tv_usec / 1000 - + (infer_start.tv_sec * 1000 + infer_start.tv_usec / 1000)); #if 0 LOG(INFO) << "batch_size : " << out->at(0).shape[0] << " seq_len : " << out->at(0).shape[1] @@ -163,6 +181,13 @@ int BertServiceOp::inference() { } } + timeval op_end; + gettimeofday(&op_end, NULL); + uint64_t op_time = (op_end.tv_sec * 1000 + op_end.tv_usec / 1000 - + (op_start.tv_sec * 1000 + op_start.tv_usec / 1000)); + + res->set_op_time(op_time); + res->set_infer_time(infer_time); #endif for (size_t i = 0; i < in->size(); ++i) { (*in)[i].shape.clear(); @@ -175,6 +200,7 @@ int BertServiceOp::inference() { } out->clear(); butil::return_object(out); + return 0; }