提交 d927cc79 编写于 作者: X xulongteng

Merge branch 'bert' of https://github.com/MRXLT/Serving into bert

[{ [{
"dict_name": "dict", "dict_name": "test_dict",
"shard": 2, "shard": 2,
"dup": 1, "dup": 1,
"timeout": 200, "timeout": 200,
......
engines {
name: "image_classification_resnet"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/SE_ResNeXt50_32x4d"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: true
static_optimization: false
force_update_static_cache: false
}
engines {
name: "text_classification_bow"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/text_classification_lstm"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
}
engines {
name: "ctr_prediction"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/ctr_prediction"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
sparse_param_service_type: REMOTE
sparse_param_service_table_name: "test_dict"
}
engines { engines {
name: "bert" name: "bert"
type: "FLUID_GPU_ANALYSIS_DIR" type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file" reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne" reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/bert" model_data_path: "./data/model/paddle/fluid/bert"
......
...@@ -31,7 +31,7 @@ using baidu::paddle_serving::predictor::bert_service::Embedding_values; ...@@ -31,7 +31,7 @@ using baidu::paddle_serving::predictor::bert_service::Embedding_values;
extern int64_t MAX_SEQ_LEN = 128; extern int64_t MAX_SEQ_LEN = 128;
const bool POOLING = true; const bool POOLING = true;
const int LAYER_NUM = 12; const int LAYER_NUM = 12;
const int EMB_SIZE = 768; extern int EMB_SIZE = 768;
int BertServiceOp::inference() { int BertServiceOp::inference() {
timeval op_start; timeval op_start;
...@@ -49,6 +49,7 @@ int BertServiceOp::inference() { ...@@ -49,6 +49,7 @@ int BertServiceOp::inference() {
} }
MAX_SEQ_LEN = req->instances(0).max_seq_len(); MAX_SEQ_LEN = req->instances(0).max_seq_len();
EMB_SIZE = req->instances(0).emb_size();
paddle::PaddleTensor src_ids; paddle::PaddleTensor src_ids;
paddle::PaddleTensor pos_ids; paddle::PaddleTensor pos_ids;
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#endif #endif
#include "demo-serving/bert_service.pb.h" #include "demo-serving/bert_service.pb.h"
#include <sys/time.h>
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace serving { namespace serving {
......
...@@ -25,7 +25,8 @@ message BertReqInstance { ...@@ -25,7 +25,8 @@ message BertReqInstance {
repeated int64 sentence_type_ids = 2; repeated int64 sentence_type_ids = 2;
repeated int64 position_ids = 3; repeated int64 position_ids = 3;
repeated float input_masks = 4; repeated float input_masks = 4;
required int64 max_seq_len = 5; optional int64 max_seq_len = 5;
optional int64 emb_size = 6;
}; };
message Request { repeated BertReqInstance instances = 1; }; message Request { repeated BertReqInstance instances = 1; };
...@@ -34,7 +35,11 @@ message Embedding_values { repeated float values = 1; }; ...@@ -34,7 +35,11 @@ message Embedding_values { repeated float values = 1; };
message BertResInstance { repeated Embedding_values instances = 1; }; message BertResInstance { repeated Embedding_values instances = 1; };
message Response { repeated BertResInstance instances = 1; }; message Response {
repeated BertResInstance instances = 1;
optional int64 op_time = 2;
optional int64 infer_time = 3;
};
service BertService { service BertService {
rpc inference(Request) returns (Response); rpc inference(Request) returns (Response);
......
...@@ -25,7 +25,8 @@ message BertReqInstance { ...@@ -25,7 +25,8 @@ message BertReqInstance {
repeated int64 sentence_type_ids = 2; repeated int64 sentence_type_ids = 2;
repeated int64 position_ids = 3; repeated int64 position_ids = 3;
repeated float input_masks = 4; repeated float input_masks = 4;
required int64 max_seq_len = 5; optional int64 max_seq_len = 5;
optional int64 emb_size = 6;
}; };
message Request { repeated BertReqInstance instances = 1; }; message Request { repeated BertReqInstance instances = 1; };
...@@ -34,7 +35,11 @@ message Embedding_values { repeated float values = 1; }; ...@@ -34,7 +35,11 @@ message Embedding_values { repeated float values = 1; };
message BertResInstance { repeated Embedding_values instances = 1; }; message BertResInstance { repeated Embedding_values instances = 1; };
message Response { repeated BertResInstance instances = 1; }; message Response {
repeated BertResInstance instances = 1;
optional int64 op_time = 2;
optional int64 infert_time = 3;
};
service BertService { service BertService {
rpc inference(Request) returns (Response); rpc inference(Request) returns (Response);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册