diff --git a/demo-client/src/bert_service.cpp b/demo-client/src/bert_service.cpp index c76b979c3b71a9a3e659991f61185ab9823e8c64..688d1ea6b51ccb17a42957965525f06d026239e6 100644 --- a/demo-client/src/bert_service.cpp +++ b/demo-client/src/bert_service.cpp @@ -19,11 +19,10 @@ #include #include #include //NOLINT +#include "./data_pre.h" #include "sdk-cpp/bert_service.pb.h" #include "sdk-cpp/include/common.h" #include "sdk-cpp/include/predictor_sdk.h" -#include "data_pre.h" - using baidu::paddle_serving::sdk_cpp::Predictor; using baidu::paddle_serving::sdk_cpp::PredictorApi; using baidu::paddle_serving::predictor::bert_service::Request; @@ -83,7 +82,6 @@ int create_req(Request* req, } #else - int create_req(Request* req, const std::vector& data_list, int data_index, @@ -102,7 +100,7 @@ int create_req(Request* req, } std::vector feature_list = split(data_list[cur_index], ":"); - std::vector shape_list = split(feature_list[0]," "); + std::vector shape_list = split(feature_list[0], " "); std::vector token_list = split(feature_list[1], " "); std::vector pos_list = split(feature_list[2], " "); std::vector seg_list = split(feature_list[3], " "); @@ -203,6 +201,7 @@ void calc_time(int server_concurrency, int batch_size) { LOG(INFO) << "Total request : " << (time_list.size()); LOG(INFO) << "Batch size : " << batch_size; LOG(INFO) << "Max concurrency : " << server_concurrency; + LOG(INFO) << "Max Seq Len : " << max_seq_len; float total_time = 0; float max_time = 0; float min_time = 1000000; @@ -240,17 +239,22 @@ void calc_time(int server_concurrency, int batch_size) { LOG(INFO) << "99.9 percent ms: " << time_list[percent_pos_999]; } else { LOG(INFO) << "N/A"; + LOG(INFO) << "N/A"; + LOG(INFO) << "N/A"; + LOG(INFO) << "N/A"; + LOG(INFO) << "N/A"; + LOG(INFO) << "N/A"; } } int main(int argc, char** argv) { PredictorApi api; - response_time.resize(thread_num); - int server_concurrency = thread_num; if (argc > 1) { thread_num = std::stoi(argv[1]); batch_size = std::stoi(argv[2]); max_seq_len = std::stoi(argv[3]); } + response_time.resize(thread_num); + int server_concurrency = thread_num; // log set #ifdef BCLOUD logging::LoggingSettings settings; diff --git a/demo-client/src/data_pre.h b/demo-client/src/data_pre.h new file mode 100644 index 0000000000000000000000000000000000000000..ecd1168dae85275923dccf263b9b25e30d21295a --- /dev/null +++ b/demo-client/src/data_pre.h @@ -0,0 +1,72 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef SERVING_DEMO_CLIENT_SRC_DATA_PRE_H_ +#define SERVING_DEMO_CLIENT_SRC_DATA_PRE_H_ +#include +#include +#include +#include + +std::vector split(const std::string& str, + const std::string& pattern) { + std::vector res; + if (str == "") return res; + std::string strs = str + pattern; + size_t pos = strs.find(pattern); + while (pos != strs.npos) { + std::string temp = strs.substr(0, pos); + res.push_back(temp); + strs = strs.substr(pos + 1, strs.size()); + pos = strs.find(pattern); + } + return res; +} + +std::map dict; +char* dict_filename = "./data/bert/vocab.txt"; + +int make_dict() { + std::ifstream dict_file(dict_filename); + if (!dict_file) { + std::cout << "read dict failed" << std::endl; + return -1; + } + std::string line; + uint64_t index = 0; + while (getline(dict_file, line)) { + dict[line] = 0; + index += 1; + } + return 0; +} + +class BertData { + public: + int gen_data(std::string line) { + std::vector data_list; + data_list = split(line, " "); + tokenization(data_list); + return 0; + } + + int tokenization(std::vector data_list) {} + + private: + std::vector token_list; + std::vector seg_list; + std::vector pos_list; + std::vector input_masks; +}; + +#endif // SERVING_DEMO_CLIENT_SRC_DATA_PRE_H_ diff --git a/demo-serving/conf/model_toolkit.prototxt b/demo-serving/conf/model_toolkit.prototxt deleted file mode 100644 index 7e1e8542383b7f7a30c5f998a84a8c866fec79d8..0000000000000000000000000000000000000000 --- a/demo-serving/conf/model_toolkit.prototxt +++ /dev/null @@ -1,49 +0,0 @@ -engines { - name: "image_classification_resnet" - type: "FLUID_CPU_ANALYSIS_DIR" - reloadable_meta: "./data/model/paddle/fluid_time_file" - reloadable_type: "timestamp_ne" - model_data_path: "./data/model/paddle/fluid/SE_ResNeXt50_32x4d" - runtime_thread_num: 0 - batch_infer_size: 0 - enable_batch_align: 0 - enable_memory_optimization: true - static_optimization: false - force_update_static_cache: false -} - -engines { - name: "text_classification_bow" - type: "FLUID_CPU_ANALYSIS_DIR" - reloadable_meta: "./data/model/paddle/fluid_time_file" - reloadable_type: "timestamp_ne" - model_data_path: "./data/model/paddle/fluid/text_classification_lstm" - runtime_thread_num: 0 - batch_infer_size: 0 - enable_batch_align: 0 -} - -engines { - name: "ctr_prediction" - type: "FLUID_CPU_ANALYSIS_DIR" - reloadable_meta: "./data/model/paddle/fluid_time_file" - reloadable_type: "timestamp_ne" - model_data_path: "./data/model/paddle/fluid/ctr_prediction" - runtime_thread_num: 0 - batch_infer_size: 0 - enable_batch_align: 0 - sparse_param_service_type: REMOTE - sparse_param_service_table_name: "dict" -} - - -engines { - name: "bert" - type: "FLUID_CPU_ANALYSIS_DIR" - reloadable_meta: "./data/model/paddle/fluid_time_file" - reloadable_type: "timestamp_ne" - model_data_path: "./data/model/paddle/fluid/bert" - runtime_thread_num: 0 - batch_infer_size: 0 - enable_batch_align: 0 -} diff --git a/demo-serving/conf/service.prototxt b/demo-serving/conf/service.prototxt deleted file mode 100644 index 64a73006a05b46ea2ce464818a44c5d223b1ad88..0000000000000000000000000000000000000000 --- a/demo-serving/conf/service.prototxt +++ /dev/null @@ -1,43 +0,0 @@ -services { - name: "BuiltinDenseFormatService" - workflows: "workflow1" -} -services { - name: "BuiltinSparseFormatService" - workflows: "workflow2" -} - -services { - name: "BuiltinTestEchoService" - workflows: "workflow3" -} - -services { - name: "ImageClassifyService" - workflows: "workflow4" -} - -services { - name: "BuiltinFluidService" - workflows: "workflow5" -} - -services { - name: "TextClassificationService" - workflows: "workflow6" -} - -services { - name: "EchoKVDBService" - workflows: "workflow7" -} - -services { - name: "CTRPredictionService" - workflows: "workflow8" -} - -services { - name: "BertService" - workflows: "workflow9" -} diff --git a/demo-serving/op/bert_service_op.cpp b/demo-serving/op/bert_service_op.cpp index c8a3cda5a4ead6a4f1be775ca0e742d357e12199..58ba0470f1627622f8591f7d62229b58b56b8609 100644 --- a/demo-serving/op/bert_service_op.cpp +++ b/demo-serving/op/bert_service_op.cpp @@ -77,10 +77,10 @@ int BertServiceOp::inference() { for (uint32_t i = 0; i < batch_size; i++) { lod_set[0].push_back(i * MAX_SEQ_LEN); } - //src_ids.lod = lod_set; - //pos_ids.lod = lod_set; - //seg_ids.lod = lod_set; - //input_masks.lod = lod_set; + // src_ids.lod = lod_set; + // pos_ids.lod = lod_set; + // seg_ids.lod = lod_set; + // input_masks.lod = lod_set; uint32_t index = 0; for (uint32_t i = 0; i < batch_size; i++) { @@ -120,12 +120,11 @@ int BertServiceOp::inference() { return -1; } -/* - float* example = (float*)(*in)[3].data.data(); - for(uint32_t i = 0; i < MAX_SEQ_LEN; i++){ - LOG(INFO) << *(example + i); -*/ - + /* + float* example = (float*)(*in)[3].data.data(); + for(uint32_t i = 0; i < MAX_SEQ_LEN; i++){ + LOG(INFO) << *(example + i); + */ if (predictor::InferManager::instance().infer( BERT_MODEL_NAME, in, out, batch_size)) { @@ -138,7 +137,7 @@ int BertServiceOp::inference() { << " seq_len : " << out->at(0).shape[1] << " emb_size : " << out->at(0).shape[2]; - float *out_data = (float*) out->at(0).data.data(); + float *out_data = reinterpret_castout->at(0).data.data(); for (uint32_t bi = 0; bi < batch_size; bi++) { BertResInstance *res_instance = res->add_instances(); for (uint32_t si = 0; si < MAX_SEQ_LEN; si++) { @@ -150,32 +149,32 @@ int BertServiceOp::inference() { } } #else - LOG(INFO) << "batch_size : " << out->at(0).shape[0] - << " emb_size : " << out->at(0).shape[1]; - float *out_data = (float*) out->at(0).data.data(); - for (uint32_t bi = 0; bi < batch_size; bi++) { - BertResInstance *res_instance = res->add_instances(); - for (uint32_t si = 0; si < 1; si++) { - Embedding_values *emb_instance = res_instance->add_instances(); - for (uint32_t ei = 0; ei < EMB_SIZE; ei++) { - uint32_t index = bi * MAX_SEQ_LEN * EMB_SIZE + si * EMB_SIZE + ei; - emb_instance->add_values(out_data[index]); - } - } - } + LOG(INFO) << "batch_size : " << out->at(0).shape[0] + << " emb_size : " << out->at(0).shape[1]; + float *out_data = reinterpret_cast out->at(0).data.data(); + for (uint32_t bi = 0; bi < batch_size; bi++) { + BertResInstance *res_instance = res->add_instances(); + for (uint32_t si = 0; si < 1; si++) { + Embedding_values *emb_instance = res_instance->add_instances(); + for (uint32_t ei = 0; ei < EMB_SIZE; ei++) { + uint32_t index = bi * EMB_SIZE + ei; + emb_instance->add_values(out_data[index]); + } + } + } #endif - for (size_t i = 0; i < in->size(); ++i) { - (*in)[i].shape.clear(); - } - in->clear(); - butil::return_object(in); + for (size_t i = 0; i < in->size(); ++i) { + (*in)[i].shape.clear(); + } + in->clear(); + butil::return_object(in); - for (size_t i = 0; i < out->size(); ++i) { - (*out)[i].shape.clear(); - } - out->clear(); - butil::return_object(out); + for (size_t i = 0; i < out->size(); ++i) { + (*out)[i].shape.clear(); + } + out->clear(); + butil::return_object(out); return 0; }