diff --git a/demo-client/src/data_pre.h b/demo-client/src/data_pre.h index ecd1168dae85275923dccf263b9b25e30d21295a..0d410c250dc2a8207dd5c5f479e97a611cfbf1cd 100644 --- a/demo-client/src/data_pre.h +++ b/demo-client/src/data_pre.h @@ -14,6 +14,7 @@ #ifndef SERVING_DEMO_CLIENT_SRC_DATA_PRE_H_ #define SERVING_DEMO_CLIENT_SRC_DATA_PRE_H_ #include +#include #include #include #include diff --git a/demo-serving/conf/model_toolkit.prototxt b/demo-serving/conf/model_toolkit.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..88efaefba514f4763952d1b6d21d880d6d68942f --- /dev/null +++ b/demo-serving/conf/model_toolkit.prototxt @@ -0,0 +1,10 @@ +engines { + name: "bert" + type: "FLUID_GPU_ANALYSIS_DIR" + reloadable_meta: "./data/model/paddle/fluid_time_file" + reloadable_type: "timestamp_ne" + model_data_path: "./data/model/paddle/fluid/bert" + runtime_thread_num: 0 + batch_infer_size: 0 + enable_batch_align: 0 +} diff --git a/demo-serving/conf/service.prototxt b/demo-serving/conf/service.prototxt new file mode 100644 index 0000000000000000000000000000000000000000..e630669e38cbc1c804611ef10e744b6e0b7fe76c --- /dev/null +++ b/demo-serving/conf/service.prototxt @@ -0,0 +1,4 @@ +services { + name: "BertService" + workflows: "workflow9" +} diff --git a/demo-serving/op/bert_service_op.cpp b/demo-serving/op/bert_service_op.cpp index 58ba0470f1627622f8591f7d62229b58b56b8609..d393e04ab0215dce70b6b190a76b97ca09806be6 100644 --- a/demo-serving/op/bert_service_op.cpp +++ b/demo-serving/op/bert_service_op.cpp @@ -137,7 +137,7 @@ int BertServiceOp::inference() { << " seq_len : " << out->at(0).shape[1] << " emb_size : " << out->at(0).shape[2]; - float *out_data = reinterpret_castout->at(0).data.data(); + float *out_data = reinterpret_cast(out->at(0).data.data()); for (uint32_t bi = 0; bi < batch_size; bi++) { BertResInstance *res_instance = res->add_instances(); for (uint32_t si = 0; si < MAX_SEQ_LEN; si++) { @@ -151,7 +151,7 @@ int BertServiceOp::inference() { #else LOG(INFO) << "batch_size : " << out->at(0).shape[0] << " emb_size : " << out->at(0).shape[1]; - float *out_data = reinterpret_cast out->at(0).data.data(); + float *out_data = reinterpret_cast(out->at(0).data.data()); for (uint32_t bi = 0; bi < batch_size; bi++) { BertResInstance *res_instance = res->add_instances(); for (uint32_t si = 0; si < 1; si++) {