提交 ed3f22b4 编写于 作者: W wangjiawei04

link error, game on

上级 91b84226
...@@ -14,10 +14,6 @@ list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp) ...@@ -14,10 +14,6 @@ list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp)
add_library(configure ${configure_srcs}) add_library(configure ${configure_srcs})
add_dependencies(configure brpc) add_dependencies(configure brpc)
add_executable(test_configure
${CMAKE_CURRENT_LIST_DIR}/tests/test_configure.cpp)
target_link_libraries(test_configure configure protobuf)
install(TARGETS configure install(TARGETS configure
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
) )
......
...@@ -39,142 +39,6 @@ using baidu::paddle_serving::predictor::InferManager; ...@@ -39,142 +39,6 @@ using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralDistKVInferOp::inference() { int GeneralDistKVInferOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0;
int dense_count = 0;
std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
size_t key_len = 0;
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
++dense_count;
continue;
}
++sparse_count;
size_t elem_num = 1;
for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
elem_num *= in->at(i).shape[s];
}
key_len += elem_num;
int64_t *data_ptr = static_cast<int64_t *>(in->at(i).data.data());
dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
}
keys.resize(key_len);
int key_idx = 0;
for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
std::copy(dataptr_size_pairs[i].first,
dataptr_size_pairs[i].first + dataptr_size_pairs[i].second,
keys.begin() + key_idx);
key_idx += dataptr_size_pairs[i].second;
}
Timer timeline;
int64_t cube_start = timeline.TimeStampUS();
timeline.Start();
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) {
LOG(ERROR) << "(logid=" << log_id
<< ") cube init error or cube config not given.";
return -1;
}
int ret = cube->seek(table_names[0], keys, &values);
int64_t cube_end = timeline.TimeStampUS();
if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
}
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
TensorVector sparse_out;
sparse_out.resize(sparse_count);
TensorVector dense_out;
dense_out.resize(dense_count);
int cube_val_idx = 0;
int sparse_idx = 0;
int dense_idx = 0;
std::unordered_map<int, int> in_out_map;
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
for (size_t i = 0; i < in->size(); ++i) {
if (in->at(i).dtype != paddle::PaddleDType::INT64) {
dense_out[dense_idx] = in->at(i);
++dense_idx;
continue;
}
sparse_out[sparse_idx].lod.resize(in->at(i).lod.size());
for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) {
sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size());
std::copy(in->at(i).lod[x].begin(),
in->at(i).lod[x].end(),
sparse_out[sparse_idx].lod[x].begin());
}
sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
sparse_out[sparse_idx].shape.push_back(
sparse_out[sparse_idx].lod[0].back());
sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
sparse_out[sparse_idx].name = model_config->_feed_name[i];
sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
EMBEDDING_SIZE * sizeof(float));
float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
memcpy(data_ptr,
values[cube_val_idx].buff.data(),
values[cube_val_idx].buff.size());
cube_val_idx++;
}
++sparse_idx;
}
TensorVector infer_in;
infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
output_blob->SetBatchSize(batch_size);
output_blob->SetLogId(log_id);
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
int64_t start = timeline.TimeStampUS();
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, cube_start);
AddBlobInfo(output_blob, cube_end);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralDistKVInferOp); DEFINE_OP(GeneralDistKVInferOp);
......
...@@ -188,21 +188,6 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -188,21 +188,6 @@ int GeneralDistKVQuantInferOp::inference() {
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralDistKVQuantInferOp); DEFINE_OP(GeneralDistKVQuantInferOp);
......
...@@ -36,53 +36,6 @@ using baidu::paddle_serving::predictor::InferManager; ...@@ -36,53 +36,6 @@ using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralInferOp::inference() { int GeneralInferOp::inference() {
VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names();
if (pre_node_names.size() != 1) {
LOG(ERROR) << "This op(" << op_name()
<< ") can only have one predecessor op, but received "
<< pre_node_names.size();
return -1;
}
const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>();
output_blob->SetLogId(log_id);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->_batch_size;
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
if (InferManager::instance().infer(
engine_name().c_str(), in, out, batch_size)) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1;
}
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralInferOp); DEFINE_OP(GeneralInferOp);
......
...@@ -71,179 +71,6 @@ int conf_check(const Request *req, ...@@ -71,179 +71,6 @@ int conf_check(const Request *req,
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// reade request from client // reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetLogId(log_id);
if (!res) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
VLOG(2) << "(logid=" << log_id << ") print general model config done.";
// TODO(guru4elephant): how to do conditional check?
/*
int ret = conf_check(req, model_config);
if (ret != 0) {
LOG(ERROR) << "model conf of server:";
resource.print_general_model_config(model_config);
return 0;
}
*/
// package tensor
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
// prepare basic information for input
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
} else if (elem_type[i] == 1) {
elem_size[i] = sizeof(float);
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
} else if (elem_type[i] == 2) {
elem_size[i] = sizeof(int32_t);
lod_tensor.dtype = paddle::PaddleDType::INT32;
}
// implement lod tensor here
if (req->insts(0).tensor_array(i).lod_size() > 0) {
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
lod_tensor.lod.resize(1);
for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) {
lod_tensor.lod[0].push_back(req->insts(0).tensor_array(i).lod(k));
}
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} else {
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
int tensor_size = 0;
const Tensor &tensor = req->insts(0).tensor_array(i);
int data_len = 0;
if (tensor.int64_data_size() > 0) {
data_len = tensor.int64_data_size();
} else if (tensor.float_data_size() > 0) {
data_len = tensor.float_data_size();
} else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size();
}
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
out->at(i).data.Resize(tensor_size * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(capacity[i] * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << capacity[i];
}
}
// fill the data into output general_blob
for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0;
int elem_num = req->insts(0).tensor_array(i).int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int64_data(k);
}
} else if (elem_type[i] == 1) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0;
int elem_num = req->insts(0).tensor_array(i).float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).float_data(k);
}
} else if (elem_type[i] == 2) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int_data(0);
int offset = 0;
int elem_num = req->insts(0).tensor_array(i).int_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int_data(k);
}
}
}
VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
res->_batch_size = 1;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
DEFINE_OP(GeneralReaderOp); DEFINE_OP(GeneralReaderOp);
......
...@@ -40,160 +40,6 @@ using baidu::paddle_serving::predictor::InferManager; ...@@ -40,160 +40,6 @@ using baidu::paddle_serving::predictor::InferManager;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralResponseOp::inference() { int GeneralResponseOp::inference() {
const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
VLOG(2) << "(logid=" << log_id
<< ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size());
for (int i = 0; i < req->fetch_var_names_size(); ++i) {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name);
// fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
// input_blob);
if (!input_blob) {
LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1;
}
const TensorVector *in = &input_blob->tensor_vector;
ModelOutput *output = res->add_outputs();
// To get the order of model return values
output->set_engine_name(pre_name);
FetchInst *fetch_inst = output->add_insts();
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << " is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
} else {
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< model_config->_fetch_name[idx] << " is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
}
int var_idx = 0;
for (auto &idx : fetch_index) {
int cap = 1;
for (int j = 0; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
}
FetchInst *fetch_p = output->mutable_insts(0);
auto dtype = in->at(idx).dtype;
if (dtype == paddle::PaddleDType::INT64) {
VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
<< model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
// from
// https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
// `Swap` method is faster than `{}` method.
google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
&tmp_data);
} else if (dtype == paddle::PaddleDType::FLOAT32) {
VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
<< model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
google::protobuf::RepeatedField<float> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
&tmp_data);
} else if (dtype == paddle::PaddleDType::INT32) {
VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
<< model_config->_fetch_name[idx] << "].";
int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
&tmp_data);
}
if (model_config->_is_lod_fetch[idx]) {
if (in->at(idx).lod.size() > 0) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
}
}
VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
var_idx++;
}
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
// TODO(barriery): multi-model profile_time.
// At present, only the response_op is multi-input, so here we get
// the profile_time by hard coding. It needs to be replaced with
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
} else {
profile_time_idx = input_blob->p_size - 2;
}
for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
}
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
return 0; return 0;
} }
......
...@@ -12,13 +12,12 @@ set_source_files_properties( ...@@ -12,13 +12,12 @@ set_source_files_properties(
${pdserving_srcs} ${pdserving_srcs}
PROPERTIES PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure) add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure paddle_fluid)
if (WITH_TRT) if (WITH_TRT)
add_definitions(-DWITH_TRT) add_definitions(-DWITH_TRT)
endif() endif()
target_link_libraries(pdserving target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs})
# install # install
install(TARGETS pdserving install(TARGETS pdserving
RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
......
...@@ -20,10 +20,9 @@ ...@@ -20,10 +20,9 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/bsf.h"
#include "core/predictor/framework/factory.h" #include "core/predictor/framework/factory.h"
#include "core/predictor/framework/infer_data.h" #include "core/predictor/framework/infer_data.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
...@@ -105,8 +104,8 @@ class InferEngine { ...@@ -105,8 +104,8 @@ class InferEngine {
virtual int thrd_initialize() { return thrd_initialize_impl(); } virtual int thrd_initialize() { return thrd_initialize_impl(); }
virtual int thrd_clear() { return thrd_clear_impl(); } virtual int thrd_clear() { return thrd_clear_impl(); }
virtual int thrd_finalize() { return thrd_finalize_impl(); } virtual int thrd_finalize() { return thrd_finalize_impl(); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { virtual int infer() {
return infer_impl1(in, out, batch_size); return infer_impl();
} }
virtual int reload() = 0; virtual int reload() = 0;
...@@ -120,11 +119,9 @@ class InferEngine { ...@@ -120,11 +119,9 @@ class InferEngine {
virtual int thrd_finalize_impl() = 0; virtual int thrd_finalize_impl() = 0;
virtual int thrd_clear_impl() = 0; virtual int thrd_clear_impl() = 0;
virtual int proc_finalize_impl() = 0; virtual int proc_finalize_impl() = 0;
virtual int infer_impl1(const void* in, virtual std::unique_ptr<paddle_infer::Tensor> GetInputHandle(const std::string& name) = 0;
void* out, virtual std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(const std::string& name) = 0;
uint32_t batch_size = -1) = 0; virtual int infer_impl() = 0;
virtual int infer_impl2(const BatchTensor& in,
BatchTensor& out) = 0; // NOLINT
// end: framework inner call // end: framework inner call
}; };
...@@ -138,8 +135,6 @@ class ReloadableInferEngine : public InferEngine { ...@@ -138,8 +135,6 @@ class ReloadableInferEngine : public InferEngine {
uint64_t last_revision; uint64_t last_revision;
}; };
typedef im::bsf::Task<Tensor, Tensor> TaskT;
virtual int load(const InferEngineCreationParams& params) = 0; virtual int load(const InferEngineCreationParams& params) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) { int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
...@@ -201,44 +196,11 @@ class ReloadableInferEngine : public InferEngine { ...@@ -201,44 +196,11 @@ class ReloadableInferEngine : public InferEngine {
LOG(ERROR) << "Failed proc initialize impl"; LOG(ERROR) << "Failed proc initialize impl";
return -1; return -1;
} }
// init bsf framework
if (_infer_thread_num <= 0) {
return 0;
}
im::bsf::TaskExecutor<TaskT>::instance()->set_thread_init_fn(
boost::bind(&InferEngine::thrd_initialize_impl, this));
im::bsf::TaskExecutor<TaskT>::instance()->set_thread_reset_fn(
boost::bind(&InferEngine::thrd_clear_impl, this));
im::bsf::TaskExecutor<TaskT>::instance()->set_thread_callback_fn(
boost::bind(&InferEngine::infer_impl2, this, _1, _2));
im::bsf::TaskExecutor<TaskT>::instance()->set_batch_size(_infer_batch_size);
im::bsf::TaskExecutor<TaskT>::instance()->set_batch_align(
_infer_batch_align);
if (im::bsf::TaskExecutor<TaskT>::instance()->start(_infer_thread_num) !=
0) {
LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num;
return -1;
}
LOG(WARNING) << "Enable batch schedule framework, thread_num:"
<< _infer_thread_num << ", batch_size:" << _infer_batch_size
<< ", enable_batch_align:" << _infer_batch_align;
return 0; return 0;
} }
int infer(const void* in, void* out, uint32_t batch_size = -1) { int infer() {
if (_infer_thread_num <= 0) { return infer_impl();
return infer_impl1(in, out, batch_size);
}
im::bsf::TaskManager<Tensor, Tensor> task_manager;
task_manager.schedule(*(reinterpret_cast<const BatchTensor*>(in)),
*(reinterpret_cast<BatchTensor*>(out)));
task_manager.wait();
return 0;
} }
int thrd_initialize() { int thrd_initialize() {
...@@ -263,10 +225,6 @@ class ReloadableInferEngine : public InferEngine { ...@@ -263,10 +225,6 @@ class ReloadableInferEngine : public InferEngine {
return -1; return -1;
} }
if (_infer_thread_num > 0) {
im::bsf::TaskExecutor<TaskT>::instance()->stop();
}
return 0; return 0;
} }
...@@ -417,10 +375,6 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -417,10 +375,6 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
virtual int thrd_initialize_impl() { virtual int thrd_initialize_impl() {
// memory pool to be inited in non-serving-threads // memory pool to be inited in non-serving-threads
if (MempoolWrapper::instance().thread_initialize() != 0) {
LOG(ERROR) << "Failed thread initialize mempool";
return -1;
}
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>; ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _infer_engine_params) != 0) { if (!md || load_data(md, _infer_engine_params) != 0) {
...@@ -430,17 +384,12 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -430,17 +384,12 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
} }
THREAD_SETSPECIFIC(_skey, md); THREAD_SETSPECIFIC(_skey, md);
im::bsf::AutoMutex lock(_mutex);
_reload_vec.push_back(md); _reload_vec.push_back(md);
return 0; return 0;
} }
int thrd_clear_impl() { int thrd_clear_impl() {
// for non-serving-threads // for non-serving-threads
if (MempoolWrapper::instance().thread_clear() != 0) {
LOG(ERROR) << "Failed thread clear mempool";
return -1;
}
return 0; return 0;
} }
...@@ -538,11 +487,6 @@ class CloneDBReloadableInferEngine ...@@ -538,11 +487,6 @@ class CloneDBReloadableInferEngine
} }
virtual int thrd_initialize_impl() { virtual int thrd_initialize_impl() {
// memory pool to be inited in non-serving-threads
if (MempoolWrapper::instance().thread_initialize() != 0) {
LOG(ERROR) << "Failed thread initialize mempool";
return -1;
}
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>; ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _pd->cores[_pd->current_idx]) != 0) { if (!md || load_data(md, _pd->cores[_pd->current_idx]) != 0) {
...@@ -552,7 +496,6 @@ class CloneDBReloadableInferEngine ...@@ -552,7 +496,6 @@ class CloneDBReloadableInferEngine
} }
THREAD_SETSPECIFIC(DBReloadableInferEngine<EngineCore>::_skey, md); THREAD_SETSPECIFIC(DBReloadableInferEngine<EngineCore>::_skey, md);
im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
DBReloadableInferEngine<EngineCore>::_reload_vec.push_back(md); DBReloadableInferEngine<EngineCore>::_reload_vec.push_back(md);
return 0; return 0;
} }
...@@ -571,25 +514,35 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -571,25 +514,35 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public: // NOLINT public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(const std::string& name) {
FluidFamilyCore* core = DBReloadableInferEngine<FluidFamilyCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
return core->GetInputHandle(name);
}
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(const std::string& name) {
FluidFamilyCore* core = DBReloadableInferEngine<FluidFamilyCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
}
return core->GetOutputHandle(name);
}
int infer_impl1(const void* in, void* out, uint32_t batch_size = -1) { int infer_impl() {
FluidFamilyCore* core = FluidFamilyCore* core = DBReloadableInferEngine<FluidFamilyCore>::get_core();
DBReloadableInferEngine<FluidFamilyCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()"; LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1; return -1;
} }
if (!core->Run(in, out)) { if (!core->Run()) {
LOG(ERROR) << "Failed run fluid family core"; LOG(ERROR) << "Failed run fluid family core";
return -1; return -1;
} }
return 0; return 0;
} }
int infer_impl2(const BatchTensor& in, BatchTensor& out) { // NOLINT
return infer_impl1(&in, &out);
}
}; };
typedef FactoryPool<InferEngine> StaticInferFactory; typedef FactoryPool<InferEngine> StaticInferFactory;
...@@ -715,13 +668,29 @@ class VersionedInferEngine : public InferEngine { ...@@ -715,13 +668,29 @@ class VersionedInferEngine : public InferEngine {
return _versions.begin()->second; return _versions.begin()->second;
} }
int infer(const void* in, void* out, uint32_t batch_size) { int infer() {
InferEngine* engine = default_engine(); InferEngine* engine = default_engine();
if (!engine) { if (!engine) {
LOG(WARNING) << "fail to get default engine"; LOG(WARNING) << "fail to get default engine";
return -1; return -1;
} }
return engine->infer(in, out, batch_size); return engine->infer();
}
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(const std::string& name) {
InferEngine* engine = default_engine();
if (!engine) {
LOG(WARNING) << "fail to get default engine";
}
return engine->GetInputHandle(name);
}
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(const std::string& name) {
InferEngine* engine = default_engine();
if (!engine) {
LOG(WARNING) << "fail to get default engine";
}
return engine->GetOutputHandle(name);
} }
template <typename T> template <typename T>
...@@ -740,14 +709,30 @@ class VersionedInferEngine : public InferEngine { ...@@ -740,14 +709,30 @@ class VersionedInferEngine : public InferEngine {
} }
// versioned inference interface // versioned inference interface
int infer(const void* in, void* out, uint32_t batch_size, uint64_t version) { int infer(uint64_t version) {
auto iter = _versions.find(version); auto iter = _versions.find(version);
if (iter == _versions.end()) { if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version; LOG(ERROR) << "Not found version engine: " << version;
return -1; return -1;
} }
return iter->second->infer(in, out, batch_size); return iter->second->infer();
}
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(uint64_t version, const std::string& name) {
auto iter = _versions.find(version);
if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version;
}
return iter->second->GetInputHandle(name);
}
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(uint64_t version, const std::string& name) {
auto iter = _versions.find(version);
if (iter == _versions.end()) {
LOG(ERROR) << "Not found version engine: " << version;
}
return iter->second->GetOutputHandle(name);
} }
template <typename T> template <typename T>
...@@ -774,12 +759,9 @@ class VersionedInferEngine : public InferEngine { ...@@ -774,12 +759,9 @@ class VersionedInferEngine : public InferEngine {
int thrd_finalize_impl() { return -1; } int thrd_finalize_impl() { return -1; }
int thrd_clear_impl() { return -1; } int thrd_clear_impl() { return -1; }
int proc_finalize_impl() { return -1; } int proc_finalize_impl() { return -1; }
int infer_impl1(const void* in, void* out, uint32_t batch_size = -1) { int infer_impl() {
return -1; return -1;
} }
int infer_impl2(const BatchTensor& in, BatchTensor& out) { // NOLINT
return -1;
} // NOLINT
private: private:
boost::unordered_map<uint64_t, InferEngine*> _versions; boost::unordered_map<uint64_t, InferEngine*> _versions;
...@@ -877,16 +859,27 @@ class InferManager { ...@@ -877,16 +859,27 @@ class InferManager {
} }
// Inference interface // Inference interface
int infer(const char* model_name, int infer(const char* model_name) {
const void* in,
void* out,
uint32_t batch_size = -1) {
auto it = _map.find(model_name); auto it = _map.find(model_name);
if (it == _map.end()) { if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
return -1; return -1;
} }
return it->second->infer(in, out, batch_size); return it->second->infer();
}
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(const char* model_name, const std::string& name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
}
return it->second->GetInputHandle(name);
}
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(const char* model_name, const std::string& name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
}
return it->second->GetOutputHandle(name);
} }
template <typename T> template <typename T>
...@@ -907,18 +900,28 @@ class InferManager { ...@@ -907,18 +900,28 @@ class InferManager {
// Versioned inference interface // Versioned inference interface
int infer(const char* model_name, int infer(const char* model_name,
const void* in,
void* out,
uint32_t batch_size,
uint64_t version) { uint64_t version) {
auto it = _map.find(model_name); auto it = _map.find(model_name);
if (it == _map.end()) { if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
return -1; return -1;
} }
return it->second->infer(in, out, batch_size, version); return it->second->infer(version);
}
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(const char* model_name, uint64_t version, const std::string& name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
}
return it->second->GetInputHandle(version, name);
}
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(const char* model_name, uint64_t version, const std::string& name) {
auto it = _map.find(model_name);
if (it == _map.end()) {
LOG(WARNING) << "Cannot find engine in map, model name:" << model_name;
}
return it->second->GetOutputHandle(version, name);
} }
template <typename T> template <typename T>
T* get_core(const char* model_name, uint64_t version) { T* get_core(const char* model_name, uint64_t version) {
auto it = _map.find(model_name); auto it = _map.find(model_name);
......
...@@ -28,8 +28,6 @@ namespace baidu { ...@@ -28,8 +28,6 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace fluid_cpu { namespace fluid_cpu {
using configure::SigmoidConf;
class AutoLock { class AutoLock {
public: public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
...@@ -57,31 +55,28 @@ class GlobalPaddleCreateMutex { ...@@ -57,31 +55,28 @@ class GlobalPaddleCreateMutex {
pthread_mutex_t _mut; pthread_mutex_t _mut;
}; };
class GlobalSigmoidCreateMutex { using paddle_infer::Config;
public: using paddle_infer::Predictor;
pthread_mutex_t& mutex() { return _mut; } using paddle_infer::Tensor;
static pthread_mutex_t& instance() { using paddle_infer::CreatePredictor;
static GlobalSigmoidCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
// data interface // data interface
class FluidFamilyCore { class FluidFamilyCore {
public: public:
virtual ~FluidFamilyCore() {} virtual ~FluidFamilyCore() {}
virtual bool Run(const void* in_data, void* out_data) { virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
if (!_core->Run(*(std::vector<paddle::PaddleTensor>*)in_data, return _core->GetInputHandle(name);
(std::vector<paddle::PaddleTensor>*)out_data)) { }
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor"; LOG(ERROR) << "Failed call Run with paddle predictor";
return false; return false;
} }
return true; return true;
} }
...@@ -92,8 +87,8 @@ class FluidFamilyCore { ...@@ -92,8 +87,8 @@ class FluidFamilyCore {
LOG(ERROR) << "origin paddle Predictor is null."; LOG(ERROR) << "origin paddle Predictor is null.";
return -1; return -1;
} }
paddle::PaddlePredictor* p_predictor = Predictor* p_predictor =
(paddle::PaddlePredictor*)origin_core; (Predictor*)origin_core;
_core = p_predictor->Clone(); _core = p_predictor->Clone();
if (_core.get() == NULL) { if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
...@@ -105,7 +100,7 @@ class FluidFamilyCore { ...@@ -105,7 +100,7 @@ class FluidFamilyCore {
virtual void* get() { return _core.get(); } virtual void* get() { return _core.get(); }
protected: protected:
std::unique_ptr<paddle::PaddlePredictor> _core; std::shared_ptr<Predictor> _core;
}; };
// infer interface // infer interface
...@@ -119,51 +114,19 @@ class FluidCpuAnalysisCore : public FluidFamilyCore { ...@@ -119,51 +114,19 @@ class FluidCpuAnalysisCore : public FluidFamilyCore {
return -1; return -1;
} }
paddle::AnalysisConfig analysis_config; Config config;
analysis_config.SetParamsFile(data_path + "/__params__"); config.SetParamsFile(data_path + "/__params__");
analysis_config.SetProgFile(data_path + "/__model__"); config.SetProgFile(data_path + "/__model__");
analysis_config.DisableGpu(); config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1); config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(); config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuNativeCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
} }
paddle::NativeConfig native_config; config.SwitchSpecifyInputNames(true);
native_config.param_file = data_path + "/__params__";
native_config.prog_file = data_path + "/__model__";
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = paddle::CreatePaddlePredictor<paddle::NativeConfig, _core = CreatePredictor(config);
paddle::PaddleEngineKind::kNative>(
native_config);
if (NULL == _core.get()) { if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path; LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1; return -1;
...@@ -184,54 +147,24 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { ...@@ -184,54 +147,24 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
return -1; return -1;
} }
paddle::AnalysisConfig analysis_config; Config config;
analysis_config.SetModel(data_path); config.SetModel(data_path);
analysis_config.DisableGpu(); config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1); config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(); config.EnableMemoryOptim();
} }
if (params.enable_ir_optimization()) { if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true); config.SwitchIrOptim(true);
} else { } else {
analysis_config.SwitchIrOptim(false); config.SwitchIrOptim(false);
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = _core = CreatePredictor(config);
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuNativeDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::NativeConfig native_config;
native_config.model_dir = data_path;
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = paddle::CreatePaddlePredictor<paddle::NativeConfig,
paddle::PaddleEngineKind::kNative>(
native_config);
if (NULL == _core.get()) { if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path; LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1; return -1;
...@@ -323,214 +256,6 @@ class Parameter { ...@@ -323,214 +256,6 @@ class Parameter {
float* _params; float* _params;
}; };
class SigmoidModel {
public:
~SigmoidModel() {}
int load(const char* sigmoid_w_file,
const char* sigmoid_b_file,
float exp_max,
float exp_min) {
AutoLock lock(GlobalSigmoidCreateMutex::instance());
if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) {
LOG(ERROR) << "load params sigmoid_w failed.";
return -1;
}
VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] ["
<< _sigmoid_w._params[1] << "].";
if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) {
LOG(ERROR) << "load params sigmoid_b failed.";
return -1;
}
VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] ["
<< _sigmoid_b._params[1] << "].";
_exp_max_input = exp_max;
_exp_min_input = exp_min;
return 0;
}
int softmax(float x, double& o) { // NOLINT
float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0];
float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1];
_y0 = (_y0 > _exp_max_input)
? _exp_max_input
: ((_y0 < _exp_min_input) ? _exp_min_input : _y0);
_y1 = (_y1 > _exp_max_input)
? _exp_max_input
: ((_y1 < _exp_min_input) ? _exp_min_input : _y1);
o = 1.0f / (1.0f + exp(_y0 - _y1));
return 0;
}
public:
Parameter _sigmoid_w;
Parameter _sigmoid_b;
float _exp_max_input;
float _exp_min_input;
};
class SigmoidFluidModel {
public:
int softmax(float x, double& o) { // NOLINT
return _sigmoid_core->softmax(x, o);
} // NOLINT
std::unique_ptr<SigmoidFluidModel> Clone() {
std::unique_ptr<SigmoidFluidModel> clone_model;
clone_model.reset(new SigmoidFluidModel());
clone_model->_sigmoid_core = _sigmoid_core;
clone_model->_fluid_core = _fluid_core->Clone();
return std::move(clone_model); // NOLINT
}
public:
std::unique_ptr<paddle::PaddlePredictor> _fluid_core;
std::shared_ptr<SigmoidModel> _sigmoid_core;
};
class FluidCpuWithSigmoidCore : public FluidFamilyCore {
public:
virtual ~FluidCpuWithSigmoidCore() {}
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string model_path = params.get_path();
size_t pos = model_path.find_last_of("/\\");
std::string conf_path = model_path.substr(0, pos);
std::string conf_file = model_path.substr(pos);
configure::SigmoidConf conf;
if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) {
LOG(ERROR) << "failed load model path: " << model_path;
return -1;
}
_core.reset(new SigmoidFluidModel);
std::string fluid_model_data_path = conf.dnn_model_path();
predictor::InferEngineCreationParams new_params(params);
new_params.set_path(fluid_model_data_path);
int ret = load_fluid_model(new_params);
if (ret < 0) {
LOG(ERROR) << "fail to load fluid model.";
return -1;
}
const char* sigmoid_w_file = conf.sigmoid_w_file().c_str();
const char* sigmoid_b_file = conf.sigmoid_b_file().c_str();
float exp_max = conf.exp_max_input();
float exp_min = conf.exp_min_input();
_core->_sigmoid_core.reset(new SigmoidModel);
VLOG(2) << "create sigmoid core[" << _core->_sigmoid_core.get()
<< "], use count[" << _core->_sigmoid_core.use_count() << "].";
ret = _core->_sigmoid_core->load(
sigmoid_w_file, sigmoid_b_file, exp_max, exp_min);
if (ret < 0) {
LOG(ERROR) << "fail to load sigmoid model.";
return -1;
}
return 0;
}
virtual bool Run(const void* in_data, void* out_data) {
if (!_core->_fluid_core->Run(
*(std::vector<paddle::PaddleTensor>*)in_data,
(std::vector<paddle::PaddleTensor>*)out_data)) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int clone(SigmoidFluidModel* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
_core = origin_core->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
VLOG(2) << "clone sigmoid core[" << _core->_sigmoid_core.get()
<< "] use count[" << _core->_sigmoid_core.use_count() << "].";
return 0;
}
virtual SigmoidFluidModel* get() { return _core.get(); }
virtual int load_fluid_model(
const predictor::InferEngineCreationParams& params) = 0;
int softmax(float x, double& o) { // NOLINT
return _core->_sigmoid_core->softmax(x, o);
}
protected:
std::unique_ptr<SigmoidFluidModel> _core; // NOLINT
};
class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
public:
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::NativeConfig native_config;
native_config.model_dir = data_path;
native_config.use_gpu = false;
native_config.device = 0;
native_config.fraction_of_gpu_memory = 0;
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::NativeConfig,
paddle::PaddleEngineKind::kNative>(
native_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
public:
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_cpu } // namespace fluid_cpu
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -30,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ...@@ -30,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::InferEngine, ::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR"); "FLUID_CPU_ANALYSIS_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisDirWithSigmoidCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR_SIGMOID");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuNativeCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_NATIVE");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuNativeDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_NATIVE_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuNativeDirWithSigmoidCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_NATIVE_DIR_SIGMOID");
} // namespace fluid_cpu } // namespace fluid_cpu
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
paddle.enable_static()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500), paddle.dataset.uci_housing.train(), buf_size=500),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册