diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 8c1469706320acb2e6c03d90e9b4b88ccdcb261c..ad95b3ef6db215fddf165d0718d46037749af31f 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -31,11 +31,11 @@ message( "WITH_GPU = ${WITH_GPU}") # Paddle Version should be one of: # latest: latest develop build # version number like 1.5.2 -SET(PADDLE_VERSION "2.0.0-rc0") +SET(PADDLE_VERSION "2.0.0-rc1") if (WITH_GPU) if (WITH_TRT) - SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6") + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7-avx-mkl-trt6") else() SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl") endif() diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt index 9bf9cefc02d4c1dea4114f637463d5ad1a979cf7..8476192dd33c8fdf2583c3c5fc48b8d3e0ba0b9e 100644 --- a/core/configure/CMakeLists.txt +++ b/core/configure/CMakeLists.txt @@ -14,10 +14,6 @@ list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp) add_library(configure ${configure_srcs}) add_dependencies(configure brpc) -add_executable(test_configure - ${CMAKE_CURRENT_LIST_DIR}/tests/test_configure.cpp) -target_link_libraries(test_configure configure protobuf) - install(TARGETS configure ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib ) diff --git a/core/general-server/op/general_dist_kv_infer_op.cpp b/core/general-server/op/general_dist_kv_infer_op.cpp index 6809907226511f7de576f1e2bbdc21b7ac401422..946be34976b28a64481021b1a0953358557181a9 100644 --- a/core/general-server/op/general_dist_kv_infer_op.cpp +++ b/core/general-server/op/general_dist_kv_infer_op.cpp @@ -39,142 +39,6 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralDistKVInferOp::inference() { - VLOG(2) << "Going to run inference"; - const std::vector pre_node_names = pre_names(); - if (pre_node_names.size() != 1) { - LOG(ERROR) << "This op(" << op_name() - << ") can only have one predecessor op, but received " - << pre_node_names.size(); - return -1; - } - const std::string pre_name = pre_node_names[0]; - - const GeneralBlob *input_blob = get_depend_argument(pre_name); - uint64_t log_id = input_blob->GetLogId(); - VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name; - GeneralBlob *output_blob = mutable_data(); - - if (!input_blob) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed mutable depended argument, op:" << pre_name; - return -1; - } - - const TensorVector *in = &input_blob->tensor_vector; - TensorVector *out = &output_blob->tensor_vector; - int batch_size = input_blob->GetBatchSize(); - VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size; - std::vector keys; - std::vector values; - int sparse_count = 0; - int dense_count = 0; - std::vector> dataptr_size_pairs; - size_t key_len = 0; - for (size_t i = 0; i < in->size(); ++i) { - if (in->at(i).dtype != paddle::PaddleDType::INT64) { - ++dense_count; - continue; - } - ++sparse_count; - size_t elem_num = 1; - for (size_t s = 0; s < in->at(i).shape.size(); ++s) { - elem_num *= in->at(i).shape[s]; - } - key_len += elem_num; - int64_t *data_ptr = static_cast(in->at(i).data.data()); - dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num)); - } - keys.resize(key_len); - int key_idx = 0; - for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) { - std::copy(dataptr_size_pairs[i].first, - dataptr_size_pairs[i].first + dataptr_size_pairs[i].second, - keys.begin() + key_idx); - key_idx += dataptr_size_pairs[i].second; - } - Timer timeline; - int64_t cube_start = timeline.TimeStampUS(); - timeline.Start(); - rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); - std::vector table_names = cube->get_table_names(); - if (table_names.size() == 0) { - LOG(ERROR) << "(logid=" << log_id - << ") cube init error or cube config not given."; - return -1; - } - int ret = cube->seek(table_names[0], keys, &values); - int64_t cube_end = timeline.TimeStampUS(); - if (values.size() != keys.size() || values[0].buff.size() == 0) { - LOG(ERROR) << "(logid=" << log_id << ") cube value return null"; - } - size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); - TensorVector sparse_out; - sparse_out.resize(sparse_count); - TensorVector dense_out; - dense_out.resize(dense_count); - int cube_val_idx = 0; - int sparse_idx = 0; - int dense_idx = 0; - std::unordered_map in_out_map; - baidu::paddle_serving::predictor::Resource &resource = - baidu::paddle_serving::predictor::Resource::instance(); - std::shared_ptr model_config = - resource.get_general_model_config(); - for (size_t i = 0; i < in->size(); ++i) { - if (in->at(i).dtype != paddle::PaddleDType::INT64) { - dense_out[dense_idx] = in->at(i); - ++dense_idx; - continue; - } - - sparse_out[sparse_idx].lod.resize(in->at(i).lod.size()); - for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) { - sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size()); - std::copy(in->at(i).lod[x].begin(), - in->at(i).lod[x].end(), - sparse_out[sparse_idx].lod[x].begin()); - } - sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32; - sparse_out[sparse_idx].shape.push_back( - sparse_out[sparse_idx].lod[0].back()); - sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE); - sparse_out[sparse_idx].name = model_config->_feed_name[i]; - sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() * - EMBEDDING_SIZE * sizeof(float)); - float *dst_ptr = static_cast(sparse_out[sparse_idx].data.data()); - for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) { - float *data_ptr = dst_ptr + x * EMBEDDING_SIZE; - memcpy(data_ptr, - values[cube_val_idx].buff.data(), - values[cube_val_idx].buff.size()); - cube_val_idx++; - } - ++sparse_idx; - } - TensorVector infer_in; - infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end()); - infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); - - output_blob->SetBatchSize(batch_size); - output_blob->SetLogId(log_id); - - VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size; - - int64_t start = timeline.TimeStampUS(); - - if (InferManager::instance().infer( - engine_name().c_str(), &infer_in, out, batch_size)) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed do infer in fluid model: " << engine_name(); - return -1; - } - - int64_t end = timeline.TimeStampUS(); - CopyBlobInfo(input_blob, output_blob); - AddBlobInfo(output_blob, cube_start); - AddBlobInfo(output_blob, cube_end); - AddBlobInfo(output_blob, start); - AddBlobInfo(output_blob, end); return 0; } DEFINE_OP(GeneralDistKVInferOp); diff --git a/core/general-server/op/general_dist_kv_quant_infer_op.cpp b/core/general-server/op/general_dist_kv_quant_infer_op.cpp index 93ce76f3d3399ac62435352d2271154ab7f84235..7d347702768c13b997ea97291a8f9fde0ce042a2 100644 --- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp +++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp @@ -188,21 +188,6 @@ int GeneralDistKVQuantInferOp::inference() { VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size; - Timer timeline; - int64_t start = timeline.TimeStampUS(); - timeline.Start(); - - if (InferManager::instance().infer( - engine_name().c_str(), &infer_in, out, batch_size)) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed do infer in fluid model: " << engine_name(); - return -1; - } - - int64_t end = timeline.TimeStampUS(); - CopyBlobInfo(input_blob, output_blob); - AddBlobInfo(output_blob, start); - AddBlobInfo(output_blob, end); return 0; } DEFINE_OP(GeneralDistKVQuantInferOp); diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index b9478542c71e04b0f3f80b277da7d8d41f636d3d..c059b9607ddc3996e52c43270be1b5cb1442c2b8 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -44,45 +44,11 @@ int GeneralInferOp::inference() { << pre_node_names.size(); return -1; } - const std::string pre_name = pre_node_names[0]; - - const GeneralBlob *input_blob = get_depend_argument(pre_name); - uint64_t log_id = input_blob->GetLogId(); - VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name; - GeneralBlob *output_blob = mutable_data(); - output_blob->SetLogId(log_id); - - if (!input_blob) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed mutable depended argument, op:" << pre_name; - return -1; - } - - const TensorVector *in = &input_blob->tensor_vector; - TensorVector *out = &output_blob->tensor_vector; - - int batch_size = input_blob->_batch_size; - VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size; - - output_blob->_batch_size = batch_size; - - VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size; - - Timer timeline; - int64_t start = timeline.TimeStampUS(); - timeline.Start(); - if (InferManager::instance().infer( - engine_name().c_str(), in, out, batch_size)) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed do infer in fluid model: " << engine_name().c_str(); + engine_name().c_str())) { return -1; } - - int64_t end = timeline.TimeStampUS(); - CopyBlobInfo(input_blob, output_blob); - AddBlobInfo(output_blob, start); - AddBlobInfo(output_blob, end); + std::cout << "Infer Success" << std::endl; return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 0329fac6b9bb6eda59f3f6f1589cd00c3eec0fd9..20ab055761177fd17c2143c8347259c858ad79ef 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -20,6 +20,7 @@ #include "core/general-server/op/general_infer_helper.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" #include "core/util/include/timer.h" namespace baidu { @@ -32,6 +33,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::FeedInst; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; +using baidu::paddle_serving::predictor::InferManager; int conf_check(const Request *req, const std::shared_ptr &model_config) { @@ -71,75 +73,34 @@ int conf_check(const Request *req, int GeneralReaderOp::inference() { // reade request from client - const Request *req = dynamic_cast(get_request_message()); + // TODO: only support one engine here + std::string engine_name = "general_infer_0"; + const Request *req = dynamic_cast(get_request_message()); uint64_t log_id = req->log_id(); int input_var_num = 0; std::vector elem_type; std::vector elem_size; std::vector capacity; - - GeneralBlob *res = mutable_data(); - TensorVector *out = &res->tensor_vector; - - res->SetLogId(log_id); - - if (!res) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed get op tls reader object output"; - } - - Timer timeline; - int64_t start = timeline.TimeStampUS(); int var_num = req->insts(0).tensor_array_size(); - VLOG(2) << "(logid=" << log_id << ") var num: " << var_num; - - VLOG(2) << "(logid=" << log_id - << ") start to call load general model_conf op"; - baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); - - VLOG(2) << "(logid=" << log_id << ") get resource pointer done."; std::shared_ptr model_config = resource.get_general_model_config(); - - VLOG(2) << "(logid=" << log_id << ") print general model config done."; - - // TODO(guru4elephant): how to do conditional check? - /* - int ret = conf_check(req, model_config); - if (ret != 0) { - LOG(ERROR) << "model conf of server:"; - resource.print_general_model_config(model_config); - return 0; - } - */ - // package tensor - elem_type.resize(var_num); elem_size.resize(var_num); capacity.resize(var_num); - // prepare basic information for input for (int i = 0; i < var_num; ++i) { - paddle::PaddleTensor lod_tensor; - elem_type[i] = req->insts(0).tensor_array(i).elem_type(); - VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; - if (elem_type[i] == 0) { // int64 - elem_size[i] = sizeof(int64_t); - lod_tensor.dtype = paddle::PaddleDType::INT64; - } else if (elem_type[i] == 1) { - elem_size[i] = sizeof(float); - lod_tensor.dtype = paddle::PaddleDType::FLOAT32; - } else if (elem_type[i] == 2) { - elem_size[i] = sizeof(int32_t); - lod_tensor.dtype = paddle::PaddleDType::INT32; - } - // implement lod tensor here + std::string tensor_name = model_config->_feed_name[i]; + std::cout << "START Tensor Name: " <> lod; + std::vector shape; + // get lod info here if (req->insts(0).tensor_array(i).lod_size() > 0) { - VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor"; - lod_tensor.lod.resize(1); + lod.resize(1); for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) { - lod_tensor.lod[0].push_back(req->insts(0).tensor_array(i).lod(k)); + lod[0].push_back(req->insts(0).tensor_array(i).lod(k)); } capacity[i] = 1; for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { @@ -147,103 +108,55 @@ int GeneralReaderOp::inference() { VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim; capacity[i] *= dim; - lod_tensor.shape.push_back(dim); + shape.push_back(dim); } VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is tensor, capacity: " << capacity[i]; - } else { + } + else { capacity[i] = 1; for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { int dim = req->insts(0).tensor_array(i).shape(k); VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim; capacity[i] *= dim; - lod_tensor.shape.push_back(dim); + shape.push_back(dim); } VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is tensor, capacity: " << capacity[i]; } - lod_tensor.name = model_config->_feed_name[i]; - out->push_back(lod_tensor); - } - // specify the memory needed for output tensor_vector - for (int i = 0; i < var_num; ++i) { - if (out->at(i).lod.size() == 1) { - int tensor_size = 0; - const Tensor &tensor = req->insts(0).tensor_array(i); - int data_len = 0; - if (tensor.int64_data_size() > 0) { - data_len = tensor.int64_data_size(); - } else if (tensor.float_data_size() > 0) { - data_len = tensor.float_data_size(); - } else if (tensor.int_data_size() > 0) { - data_len = tensor.int_data_size(); - } - VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i - << "]: " << data_len; - tensor_size += data_len; - - int cur_len = out->at(i).lod[0].back(); - VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len; - - int sample_len = 0; - if (tensor.shape_size() == 1) { - sample_len = data_len; - } else { - sample_len = tensor.shape(0); - } - VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len; - out->at(i).data.Resize(tensor_size * elem_size[i]); - VLOG(2) << "(logid=" << log_id << ") var[" << i - << "] is lod_tensor and len=" << out->at(i).lod[0].back(); - } else { - out->at(i).data.Resize(capacity[i] * elem_size[i]); - VLOG(2) << "(logid=" << log_id << ") var[" << i - << "] is tensor and capacity=" << capacity[i]; - } - } - - // fill the data into output general_blob - for (int i = 0; i < var_num; ++i) { - if (elem_type[i] == 0) { - int64_t *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << req->insts(0).tensor_array(i).int64_data(0); - int offset = 0; + lod_tensor->SetLoD(lod); + lod_tensor->Reshape(shape); + std::cout << "FINI Set Lod and Reshape, and elem type: "<< elem_type[i] << std::endl; + // insert data here + if (req->insts(0).tensor_array(i).elem_type() == 0) { + // TODO: Copy twice here, can optimize int elem_num = req->insts(0).tensor_array(i).int64_data_size(); + std::vector data(elem_num); + int64_t* dst_ptr = data.data(); for (int k = 0; k < elem_num; ++k) { - dst_ptr[offset + k] = req->insts(0).tensor_array(i).int64_data(k); + dst_ptr[k] = req->insts(0).tensor_array(i).int64_data(k); } - } else if (elem_type[i] == 1) { - float *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << req->insts(0).tensor_array(i).float_data(0); - int offset = 0; + lod_tensor->CopyFromCpu(dst_ptr); + } else if(req->insts(0).tensor_array(i).elem_type() == 1) { int elem_num = req->insts(0).tensor_array(i).float_data_size(); + std::vector data(elem_num); + float* dst_ptr = data.data(); for (int k = 0; k < elem_num; ++k) { - dst_ptr[offset + k] = req->insts(0).tensor_array(i).float_data(k); + dst_ptr[k] = req->insts(0).tensor_array(i).float_data(k); } - } else if (elem_type[i] == 2) { - int32_t *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << req->insts(0).tensor_array(i).int_data(0); - int offset = 0; + lod_tensor->CopyFromCpu(dst_ptr); + } else if(req->insts(0).tensor_array(i).elem_type()== 2) { int elem_num = req->insts(0).tensor_array(i).int_data_size(); + std::vector data(elem_num); + int32_t* dst_ptr = data.data(); for (int k = 0; k < elem_num; ++k) { - dst_ptr[offset + k] = req->insts(0).tensor_array(i).int_data(k); + dst_ptr[k] = req->insts(0).tensor_array(i).int_data(k); } + lod_tensor->CopyFromCpu(dst_ptr); } + std::cout << "FINISH Tensor Name: " <size(); - timeline.Pause(); - int64_t end = timeline.TimeStampUS(); - res->p_size = 0; - res->_batch_size = 1; - AddBlobInfo(res, start); - AddBlobInfo(res, end); - - VLOG(2) << "(logid=" << log_id << ") read data from client success"; return 0; } DEFINE_OP(GeneralReaderOp); diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 5f80510f79f8acf09aed9f7f65e84b9cfaa9a8ed..7ccc01ca30903d9ce1bb90dce7ae59a7cad4d3a0 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -40,160 +40,55 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralResponseOp::inference() { - const std::vector pre_node_names = pre_names(); - VLOG(2) << "pre node names size: " << pre_node_names.size(); - const GeneralBlob *input_blob; - uint64_t log_id = - get_depend_argument(pre_node_names[0])->GetLogId(); - const Request *req = dynamic_cast(get_request_message()); // response inst with only fetch_var_names Response *res = mutable_data(); - - Timer timeline; - // double response_time = 0.0; - // timeline.Start(); - int64_t start = timeline.TimeStampUS(); - - VLOG(2) << "(logid=" << log_id - << ") start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); - - VLOG(2) << "(logid=" << log_id << ") get resource pointer done."; std::shared_ptr model_config = resource.get_general_model_config(); - - VLOG(2) << "(logid=" << log_id - << ") max body size : " << brpc::fLU64::FLAGS_max_body_size; - - std::vector fetch_index; - fetch_index.resize(req->fetch_var_names_size()); + std::vector capacity(req->fetch_var_names_size(), 1); + std::string engine_name = "general_infer_0"; + ModelOutput *output = res->add_outputs(); + FetchInst *fetch_inst = output->add_insts(); + FetchInst *fetch_p = output->mutable_insts(0); + std::vector outs = InferManager::instance().GetOutputNames(engine_name.c_str()); for (int i = 0; i < req->fetch_var_names_size(); ++i) { - fetch_index[i] = - model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; - } - - for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { - const std::string &pre_name = pre_node_names[pi]; - VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name - << " (" << pre_node_names.size() << ")"; - input_blob = get_depend_argument(pre_name); - // fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(), - // input_blob); - if (!input_blob) { - LOG(ERROR) << "(logid=" << log_id - << ") Failed mutable depended argument, op: " << pre_name; - return -1; + Tensor *tensor = fetch_inst->add_tensor_array(); + std::string tensor_name = outs[i]; + auto lod_tensor = InferManager::instance().GetOutputHandle(engine_name.c_str(), tensor_name.c_str()); + std::vector shape = lod_tensor->shape(); + for (int k = 0; k < shape.size(); ++k) { + capacity[i] *= shape[k]; + tensor->add_shape(shape[k]); } - - const TensorVector *in = &input_blob->tensor_vector; - - ModelOutput *output = res->add_outputs(); - // To get the order of model return values - output->set_engine_name(pre_name); - FetchInst *fetch_inst = output->add_insts(); - - for (auto &idx : fetch_index) { - Tensor *tensor = fetch_inst->add_tensor_array(); - if (model_config->_is_lod_fetch[idx]) { - VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] " - << model_config->_fetch_name[idx] << " is lod_tensor"; - for (int k = 0; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "(logid=" << log_id << ") shape[" << k - << "]: " << in->at(idx).shape[k]; - tensor->add_shape(in->at(idx).shape[k]); - } - } else { - VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] " - << model_config->_fetch_name[idx] << " is tensor"; - for (int k = 0; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "(logid=" << log_id << ") shape[" << k - << "]: " << in->at(idx).shape[k]; - tensor->add_shape(in->at(idx).shape[k]); - } - } + auto dtype = lod_tensor->type(); + if (dtype == paddle::PaddleDType::INT64) { + std::vector datas(capacity[i]); + int64_t* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_int64_data()->Swap(&tmp_data); + } else if (dtype == paddle::PaddleDType::FLOAT32) { + std::vector datas(capacity[i]); + float* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_float_data()->Swap(&tmp_data); + } else if (dtype == paddle::PaddleDType::INT32) { + std::vector datas(capacity[i]); + int32_t* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_int_data()->Swap(&tmp_data); } - - int var_idx = 0; - for (auto &idx : fetch_index) { - int cap = 1; - for (int j = 0; j < in->at(idx).shape.size(); ++j) { - cap *= in->at(idx).shape[j]; + std::vector> lod = lod_tensor->lod(); + if (lod.size() > 0) { + for (int j = 0; j < lod[0].size(); ++j) { + tensor->add_lod(lod[0][j]); } - - FetchInst *fetch_p = output->mutable_insts(0); - auto dtype = in->at(idx).dtype; - - if (dtype == paddle::PaddleDType::INT64) { - VLOG(2) << "(logid=" << log_id << ") Prepare int64 var [" - << model_config->_fetch_name[idx] << "]."; - int64_t *data_ptr = static_cast(in->at(idx).data.data()); - // from - // https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy - // `Swap` method is faster than `{}` method. - google::protobuf::RepeatedField tmp_data(data_ptr, - data_ptr + cap); - fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap( - &tmp_data); - } else if (dtype == paddle::PaddleDType::FLOAT32) { - VLOG(2) << "(logid=" << log_id << ") Prepare float var [" - << model_config->_fetch_name[idx] << "]."; - float *data_ptr = static_cast(in->at(idx).data.data()); - google::protobuf::RepeatedField tmp_data(data_ptr, - data_ptr + cap); - fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap( - &tmp_data); - } else if (dtype == paddle::PaddleDType::INT32) { - VLOG(2) << "(logid=" << log_id << ")Prepare int32 var [" - << model_config->_fetch_name[idx] << "]."; - int32_t *data_ptr = static_cast(in->at(idx).data.data()); - google::protobuf::RepeatedField tmp_data(data_ptr, - data_ptr + cap); - fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap( - &tmp_data); - } - - if (model_config->_is_lod_fetch[idx]) { - if (in->at(idx).lod.size() > 0) { - for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { - fetch_p->mutable_tensor_array(var_idx)->add_lod( - in->at(idx).lod[0][j]); - } - } - } - - VLOG(2) << "(logid=" << log_id << ") fetch var [" - << model_config->_fetch_name[idx] << "] ready"; - var_idx++; } } - - if (req->profile_server()) { - int64_t end = timeline.TimeStampUS(); - // TODO(barriery): multi-model profile_time. - // At present, only the response_op is multi-input, so here we get - // the profile_time by hard coding. It needs to be replaced with - // a more elegant way. - for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { - input_blob = get_depend_argument(pre_node_names[pi]); - VLOG(2) << "(logid=" << log_id - << ") p size for input blob: " << input_blob->p_size; - int profile_time_idx = -1; - if (pi == 0) { - profile_time_idx = 0; - } else { - profile_time_idx = input_blob->p_size - 2; - } - for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) { - res->add_profile_time(input_blob->time_stamp[profile_time_idx]); - } - } - // TODO(guru4elephant): find more elegant way to do this - res->add_profile_time(start); - res->add_profile_time(end); - } - return 0; } diff --git a/core/predictor/CMakeLists.txt b/core/predictor/CMakeLists.txt index 637c7c15530273bc908ec2f8693a3d66989eebd2..10fcd0b23b2d76a3e693bc29e07f5add663dbcdf 100644 --- a/core/predictor/CMakeLists.txt +++ b/core/predictor/CMakeLists.txt @@ -12,13 +12,12 @@ set_source_files_properties( ${pdserving_srcs} PROPERTIES COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") -add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure) +add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_fluid) if (WITH_TRT) add_definitions(-DWITH_TRT) endif() target_link_libraries(pdserving - brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) - + brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs}) # install install(TARGETS pdserving RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 431bc456326c1714dce48e2f6321bf58f3e021ce..cc0cb8f6593f3d55538684571bcd16554925f95e 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -20,10 +20,9 @@ #include #include #include "core/predictor/common/inner_common.h" -#include "core/predictor/framework/bsf.h" #include "core/predictor/framework/factory.h" #include "core/predictor/framework/infer_data.h" - +#include "paddle_inference_api.h" // NOLINT namespace baidu { namespace paddle_serving { namespace predictor { @@ -105,8 +104,8 @@ class InferEngine { virtual int thrd_initialize() { return thrd_initialize_impl(); } virtual int thrd_clear() { return thrd_clear_impl(); } virtual int thrd_finalize() { return thrd_finalize_impl(); } - virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { - return infer_impl1(in, out, batch_size); + virtual int infer() { + return infer_impl(); } virtual int reload() = 0; @@ -120,11 +119,11 @@ class InferEngine { virtual int thrd_finalize_impl() = 0; virtual int thrd_clear_impl() = 0; virtual int proc_finalize_impl() = 0; - virtual int infer_impl1(const void* in, - void* out, - uint32_t batch_size = -1) = 0; - virtual int infer_impl2(const BatchTensor& in, - BatchTensor& out) = 0; // NOLINT + virtual std::vector GetInputNames() = 0; + virtual std::vector GetOutputNames() = 0; + virtual std::unique_ptr GetInputHandle(const std::string& name) = 0; + virtual std::unique_ptr GetOutputHandle(const std::string& name) = 0; + virtual int infer_impl() = 0; // end: framework inner call }; @@ -138,8 +137,6 @@ class ReloadableInferEngine : public InferEngine { uint64_t last_revision; }; - typedef im::bsf::Task TaskT; - virtual int load(const InferEngineCreationParams& params) = 0; int proc_initialize_impl(const configure::EngineDesc& conf, bool version) { @@ -201,44 +198,11 @@ class ReloadableInferEngine : public InferEngine { LOG(ERROR) << "Failed proc initialize impl"; return -1; } - - // init bsf framework - if (_infer_thread_num <= 0) { - return 0; - } - - im::bsf::TaskExecutor::instance()->set_thread_init_fn( - boost::bind(&InferEngine::thrd_initialize_impl, this)); - im::bsf::TaskExecutor::instance()->set_thread_reset_fn( - boost::bind(&InferEngine::thrd_clear_impl, this)); - im::bsf::TaskExecutor::instance()->set_thread_callback_fn( - boost::bind(&InferEngine::infer_impl2, this, _1, _2)); - im::bsf::TaskExecutor::instance()->set_batch_size(_infer_batch_size); - im::bsf::TaskExecutor::instance()->set_batch_align( - _infer_batch_align); - if (im::bsf::TaskExecutor::instance()->start(_infer_thread_num) != - 0) { - LOG(ERROR) << "Failed start bsf executor, threads:" << _infer_thread_num; - return -1; - } - - LOG(WARNING) << "Enable batch schedule framework, thread_num:" - << _infer_thread_num << ", batch_size:" << _infer_batch_size - << ", enable_batch_align:" << _infer_batch_align; - return 0; } - int infer(const void* in, void* out, uint32_t batch_size = -1) { - if (_infer_thread_num <= 0) { - return infer_impl1(in, out, batch_size); - } - - im::bsf::TaskManager task_manager; - task_manager.schedule(*(reinterpret_cast(in)), - *(reinterpret_cast(out))); - task_manager.wait(); - return 0; + int infer() { + return infer_impl(); } int thrd_initialize() { @@ -263,10 +227,6 @@ class ReloadableInferEngine : public InferEngine { return -1; } - if (_infer_thread_num > 0) { - im::bsf::TaskExecutor::instance()->stop(); - } - return 0; } @@ -417,10 +377,6 @@ class DBReloadableInferEngine : public ReloadableInferEngine { virtual int thrd_initialize_impl() { // memory pool to be inited in non-serving-threads - if (MempoolWrapper::instance().thread_initialize() != 0) { - LOG(ERROR) << "Failed thread initialize mempool"; - return -1; - } ModelData* md = new (std::nothrow) ModelData; if (!md || load_data(md, _infer_engine_params) != 0) { @@ -430,17 +386,12 @@ class DBReloadableInferEngine : public ReloadableInferEngine { } THREAD_SETSPECIFIC(_skey, md); - im::bsf::AutoMutex lock(_mutex); _reload_vec.push_back(md); return 0; } int thrd_clear_impl() { // for non-serving-threads - if (MempoolWrapper::instance().thread_clear() != 0) { - LOG(ERROR) << "Failed thread clear mempool"; - return -1; - } return 0; } @@ -538,11 +489,6 @@ class CloneDBReloadableInferEngine } virtual int thrd_initialize_impl() { - // memory pool to be inited in non-serving-threads - if (MempoolWrapper::instance().thread_initialize() != 0) { - LOG(ERROR) << "Failed thread initialize mempool"; - return -1; - } ModelData* md = new (std::nothrow) ModelData; if (!md || load_data(md, _pd->cores[_pd->current_idx]) != 0) { @@ -552,7 +498,6 @@ class CloneDBReloadableInferEngine } THREAD_SETSPECIFIC(DBReloadableInferEngine::_skey, md); - im::bsf::AutoMutex lock(DBReloadableInferEngine::_mutex); DBReloadableInferEngine::_reload_vec.push_back(md); return 0; } @@ -571,25 +516,51 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { public: // NOLINT FluidInferEngine() {} ~FluidInferEngine() {} + std::vector GetInputNames() { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; + } + return core->GetInputNames(); + } + + std::vector GetOutputNames() { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; + } + return core->GetOutputNames(); + } + + std::unique_ptr GetInputHandle(const std::string& name) { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; + } + return core->GetInputHandle(name); + } + + std::unique_ptr GetOutputHandle(const std::string& name) { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetOutputHandle()"; + } + return core->GetOutputHandle(name); + } - int infer_impl1(const void* in, void* out, uint32_t batch_size = -1) { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + int infer_impl() { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in infer_impl()"; return -1; } - if (!core->Run(in, out)) { + if (!core->Run()) { LOG(ERROR) << "Failed run fluid family core"; return -1; } return 0; } - - int infer_impl2(const BatchTensor& in, BatchTensor& out) { // NOLINT - return infer_impl1(&in, &out); - } }; typedef FactoryPool StaticInferFactory; @@ -715,13 +686,43 @@ class VersionedInferEngine : public InferEngine { return _versions.begin()->second; } - int infer(const void* in, void* out, uint32_t batch_size) { + int infer() { InferEngine* engine = default_engine(); if (!engine) { LOG(WARNING) << "fail to get default engine"; return -1; } - return engine->infer(in, out, batch_size); + return engine->infer(); + } + + std::vector GetInputNames() { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetInputNames(); + } + std::vector GetOutputNames() { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetOutputNames(); + } + std::unique_ptr GetInputHandle(const std::string& name) { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetInputHandle(name); + } + + std::unique_ptr GetOutputHandle(const std::string& name) { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetOutputHandle(name); } template @@ -740,14 +741,45 @@ class VersionedInferEngine : public InferEngine { } // versioned inference interface - int infer(const void* in, void* out, uint32_t batch_size, uint64_t version) { + int infer(uint64_t version) { auto iter = _versions.find(version); if (iter == _versions.end()) { LOG(ERROR) << "Not found version engine: " << version; return -1; } - return iter->second->infer(in, out, batch_size); + return iter->second->infer(); + } + std::vector GetInputNames(uint64_t version) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetInputNames(); + } + + std::vector GetOutputNames(uint64_t version) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetOutputNames(); + } + + std::unique_ptr GetInputHandle(uint64_t version, const std::string& name) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetInputHandle(name); + } + + std::unique_ptr GetOutputHandle(uint64_t version, const std::string& name) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetOutputHandle(name); } template @@ -774,12 +806,9 @@ class VersionedInferEngine : public InferEngine { int thrd_finalize_impl() { return -1; } int thrd_clear_impl() { return -1; } int proc_finalize_impl() { return -1; } - int infer_impl1(const void* in, void* out, uint32_t batch_size = -1) { + int infer_impl() { return -1; } - int infer_impl2(const BatchTensor& in, BatchTensor& out) { // NOLINT - return -1; - } // NOLINT private: boost::unordered_map _versions; @@ -877,16 +906,42 @@ class InferManager { } // Inference interface - int infer(const char* model_name, - const void* in, - void* out, - uint32_t batch_size = -1) { + int infer(const char* model_name) { auto it = _map.find(model_name); if (it == _map.end()) { LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; return -1; } - return it->second->infer(in, out, batch_size); + return it->second->infer(); + } + + std::vector GetInputNames(const char* model_name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputNames(); + } + std::vector GetOutputNames(const char* model_name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputNames(); + } + std::unique_ptr GetInputHandle(const char* model_name, const std::string& name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputHandle(name); + } + std::unique_ptr GetOutputHandle(const char* model_name, const std::string& name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputHandle(name); } template @@ -907,18 +962,44 @@ class InferManager { // Versioned inference interface int infer(const char* model_name, - const void* in, - void* out, - uint32_t batch_size, uint64_t version) { auto it = _map.find(model_name); if (it == _map.end()) { LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; return -1; } - return it->second->infer(in, out, batch_size, version); + return it->second->infer(version); + } + std::vector GetInputNames(const char* model_name, uint64_t version) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputNames(version); } + std::vector GetOutputNames(const char* model_name, uint64_t version) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputNames(version); + } + + std::unique_ptr GetInputHandle(const char* model_name, uint64_t version, const std::string& name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputHandle(version, name); + } + std::unique_ptr GetOutputHandle(const char* model_name, uint64_t version, const std::string& name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputHandle(version, name); + } template T* get_core(const char* model_name, uint64_t version) { auto it = _map.find(model_name); diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index a4d8dda71a7977185106bb1552cb8f39ef6bc50e..159fc948318dcdf4bae047d9e41d95c54e7ac95a 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -28,8 +28,6 @@ namespace baidu { namespace paddle_serving { namespace fluid_cpu { -using configure::SigmoidConf; - class AutoLock { public: explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { @@ -57,31 +55,36 @@ class GlobalPaddleCreateMutex { pthread_mutex_t _mut; }; -class GlobalSigmoidCreateMutex { - public: - pthread_mutex_t& mutex() { return _mut; } - static pthread_mutex_t& instance() { - static GlobalSigmoidCreateMutex gmutex; - return gmutex.mutex(); - } - - private: - GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); } - - pthread_mutex_t _mut; -}; +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::Tensor; +using paddle_infer::CreatePredictor; // data interface class FluidFamilyCore { public: virtual ~FluidFamilyCore() {} - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->Run(*(std::vector*)in_data, - (std::vector*)out_data)) { + virtual std::vector GetInputNames() { + return _core->GetInputNames(); + } + + virtual std::unique_ptr GetInputHandle(const std::string& name) { + return _core->GetInputHandle(name); + } + + virtual std::vector GetOutputNames() { + return _core->GetOutputNames(); + } + + virtual std::unique_ptr GetOutputHandle(const std::string& name) { + return _core->GetOutputHandle(name); + } + + virtual bool Run() { + if (!_core->Run()) { LOG(ERROR) << "Failed call Run with paddle predictor"; return false; } - return true; } @@ -92,8 +95,8 @@ class FluidFamilyCore { LOG(ERROR) << "origin paddle Predictor is null."; return -1; } - paddle::PaddlePredictor* p_predictor = - (paddle::PaddlePredictor*)origin_core; + Predictor* p_predictor = + (Predictor*)origin_core; _core = p_predictor->Clone(); if (_core.get() == NULL) { LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; @@ -105,7 +108,7 @@ class FluidFamilyCore { virtual void* get() { return _core.get(); } protected: - std::unique_ptr _core; + std::shared_ptr _core; }; // infer interface @@ -119,51 +122,19 @@ class FluidCpuAnalysisCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetParamsFile(data_path + "/__params__"); - analysis_config.SetProgFile(data_path + "/__model__"); - analysis_config.DisableGpu(); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetParamsFile(data_path + "/__params__"); + config.SetProgFile(data_path + "/__model__"); + config.DisableGpu(); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } - analysis_config.SwitchSpecifyInputNames(true); - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidCpuNativeCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.param_file = data_path + "/__params__"; - native_config.prog_file = data_path + "/__model__"; - native_config.use_gpu = false; - native_config.device = 0; - native_config.fraction_of_gpu_memory = 0; - + config.SwitchSpecifyInputNames(true); AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -184,54 +155,24 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.DisableGpu(); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetModel(data_path); + config.DisableGpu(); + config.SwitchSpecifyInputNames(true); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } if (params.enable_ir_optimization()) { - analysis_config.SwitchIrOptim(true); + config.SwitchIrOptim(true); } else { - analysis_config.SwitchIrOptim(false); + config.SwitchIrOptim(false); } AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidCpuNativeDirCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = false; - native_config.device = 0; - native_config.fraction_of_gpu_memory = 0; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -323,214 +264,6 @@ class Parameter { float* _params; }; -class SigmoidModel { - public: - ~SigmoidModel() {} - int load(const char* sigmoid_w_file, - const char* sigmoid_b_file, - float exp_max, - float exp_min) { - AutoLock lock(GlobalSigmoidCreateMutex::instance()); - if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) { - LOG(ERROR) << "load params sigmoid_w failed."; - return -1; - } - VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] [" - << _sigmoid_w._params[1] << "]."; - if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) { - LOG(ERROR) << "load params sigmoid_b failed."; - return -1; - } - VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] [" - << _sigmoid_b._params[1] << "]."; - _exp_max_input = exp_max; - _exp_min_input = exp_min; - return 0; - } - - int softmax(float x, double& o) { // NOLINT - float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0]; - float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1]; - _y0 = (_y0 > _exp_max_input) - ? _exp_max_input - : ((_y0 < _exp_min_input) ? _exp_min_input : _y0); - _y1 = (_y1 > _exp_max_input) - ? _exp_max_input - : ((_y1 < _exp_min_input) ? _exp_min_input : _y1); - o = 1.0f / (1.0f + exp(_y0 - _y1)); - return 0; - } - - public: - Parameter _sigmoid_w; - Parameter _sigmoid_b; - float _exp_max_input; - float _exp_min_input; -}; - -class SigmoidFluidModel { - public: - int softmax(float x, double& o) { // NOLINT - return _sigmoid_core->softmax(x, o); - } // NOLINT - - std::unique_ptr Clone() { - std::unique_ptr clone_model; - clone_model.reset(new SigmoidFluidModel()); - clone_model->_sigmoid_core = _sigmoid_core; - clone_model->_fluid_core = _fluid_core->Clone(); - return std::move(clone_model); // NOLINT - } - - public: - std::unique_ptr _fluid_core; - std::shared_ptr _sigmoid_core; -}; - -class FluidCpuWithSigmoidCore : public FluidFamilyCore { - public: - virtual ~FluidCpuWithSigmoidCore() {} - - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string model_path = params.get_path(); - size_t pos = model_path.find_last_of("/\\"); - std::string conf_path = model_path.substr(0, pos); - std::string conf_file = model_path.substr(pos); - configure::SigmoidConf conf; - if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) { - LOG(ERROR) << "failed load model path: " << model_path; - return -1; - } - - _core.reset(new SigmoidFluidModel); - - std::string fluid_model_data_path = conf.dnn_model_path(); - predictor::InferEngineCreationParams new_params(params); - new_params.set_path(fluid_model_data_path); - int ret = load_fluid_model(new_params); - if (ret < 0) { - LOG(ERROR) << "fail to load fluid model."; - return -1; - } - const char* sigmoid_w_file = conf.sigmoid_w_file().c_str(); - const char* sigmoid_b_file = conf.sigmoid_b_file().c_str(); - float exp_max = conf.exp_max_input(); - float exp_min = conf.exp_min_input(); - _core->_sigmoid_core.reset(new SigmoidModel); - VLOG(2) << "create sigmoid core[" << _core->_sigmoid_core.get() - << "], use count[" << _core->_sigmoid_core.use_count() << "]."; - ret = _core->_sigmoid_core->load( - sigmoid_w_file, sigmoid_b_file, exp_max, exp_min); - if (ret < 0) { - LOG(ERROR) << "fail to load sigmoid model."; - return -1; - } - return 0; - } - - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->_fluid_core->Run( - *(std::vector*)in_data, - (std::vector*)out_data)) { - LOG(ERROR) << "Failed call Run with paddle predictor"; - return false; - } - - return true; - } - - virtual int clone(SigmoidFluidModel* origin_core) { - if (origin_core == NULL) { - LOG(ERROR) << "origin paddle Predictor is null."; - return -1; - } - _core = origin_core->Clone(); - if (_core.get() == NULL) { - LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; - return -1; - } - VLOG(2) << "clone sigmoid core[" << _core->_sigmoid_core.get() - << "] use count[" << _core->_sigmoid_core.use_count() << "]."; - return 0; - } - - virtual SigmoidFluidModel* get() { return _core.get(); } - - virtual int load_fluid_model( - const predictor::InferEngineCreationParams& params) = 0; - - int softmax(float x, double& o) { // NOLINT - return _core->_sigmoid_core->softmax(x, o); - } - - protected: - std::unique_ptr _core; // NOLINT -}; - -class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = false; - native_config.device = 0; - native_config.fraction_of_gpu_memory = 0; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor( - native_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.DisableGpu(); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); - - if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); - } - - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp index af3f93a8129282920f4cb6fd1d074e0c7eb46228..91cb0bd20c97e53952f95bb05a25582242793f57 100644 --- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp @@ -30,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_CPU_ANALYSIS_DIR"); -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidCpuAnalysisDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_ANALYSIS_DIR_SIGMOID"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_NATIVE"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_NATIVE_DIR"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidCpuNativeDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_NATIVE_DIR_SIGMOID"); - } // namespace fluid_cpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 3782c967823d07c23ba02e5ce0f388dc6b46e181..5e7efe4347ff0c53f4a86e4b55c400ca5d90b6fb 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -61,31 +61,36 @@ class GlobalPaddleCreateMutex { pthread_mutex_t _mut; }; -class GlobalSigmoidCreateMutex { - public: - pthread_mutex_t& mutex() { return _mut; } - static pthread_mutex_t& instance() { - static GlobalSigmoidCreateMutex gmutex; - return gmutex.mutex(); - } - - private: - GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); } - - pthread_mutex_t _mut; -}; +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::Tensor; +using paddle_infer::CreatePredictor; // data interface class FluidFamilyCore { public: virtual ~FluidFamilyCore() {} - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->Run(*(std::vector*)in_data, - (std::vector*)out_data)) { + virtual std::vector GetInputNames() { + return _core->GetInputNames(); + } + + virtual std::unique_ptr GetInputHandle(const std::string& name) { + return _core->GetInputHandle(name); + } + + virtual std::vector GetOutputNames() { + return _core->GetOutputNames(); + } + + virtual std::unique_ptr GetOutputHandle(const std::string& name) { + return _core->GetOutputHandle(name); + } + + virtual bool Run() { + if (!_core->Run()) { LOG(ERROR) << "Failed call Run with paddle predictor"; return false; } - return true; } @@ -96,8 +101,8 @@ class FluidFamilyCore { LOG(ERROR) << "origin paddle Predictor is null."; return -1; } - paddle::PaddlePredictor* p_predictor = - (paddle::PaddlePredictor*)origin_core; + Predictor* p_predictor = + (Predictor*)origin_core; _core = p_predictor->Clone(); if (_core.get() == NULL) { LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; @@ -109,7 +114,7 @@ class FluidFamilyCore { virtual void* get() { return _core.get(); } protected: - std::unique_ptr _core; + std::shared_ptr _core; }; // infer interface @@ -123,51 +128,19 @@ class FluidGpuAnalysisCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetParamsFile(data_path + "/__params__"); - analysis_config.SetProgFile(data_path + "/__model__"); - analysis_config.EnableUseGpu(100, FLAGS_gpuid); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetParamsFile(data_path + "/__params__"); + config.SetProgFile(data_path + "/__model__"); + config.EnableUseGpu(100, FLAGS_gpuid); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } - analysis_config.SwitchSpecifyInputNames(true); - + config.SwitchSpecifyInputNames(true); AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidGpuNativeCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.param_file = data_path + "/__params__"; - native_config.prog_file = data_path + "/__model__"; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -188,81 +161,39 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.EnableUseGpu(1500, FLAGS_gpuid); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetModel(data_path); + config.EnableUseGpu(1500, FLAGS_gpuid); + config.SwitchSpecifyInputNames(true); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } - -#if 0 // todo: support flexible shape - - int min_seq_len = 1; - int max_seq_len = 512; - int opt_seq_len = 128; - int head_number = 12; - int batch = 50; - - std::vector min_in_shape = {batch, min_seq_len, 1}; - std::vector max_in_shape = {batch, max_seq_len, 1}; - std::vector opt_in_shape = {batch, opt_seq_len, 1}; - - std::string input1_name = "src_text_a_ids"; - std::string input2_name = "pos_text_a_ids"; - std::string input3_name = "sent_text_a_ids"; - std::string input4_name = "stack_0.tmp_0"; - - std::map> min_input_shape = { - {input1_name, min_in_shape}, - {input2_name, min_in_shape}, - {input3_name, min_in_shape}, - {input4_name, {batch, head_number, min_seq_len, min_seq_len}}, - }; - - std::map> max_input_shape = { - {input1_name, max_in_shape}, - {input2_name, max_in_shape}, - {input3_name, max_in_shape}, - {input4_name, {batch, head_number, max_seq_len, max_seq_len}}, - }; - std::map> opt_input_shape = { - {input1_name, opt_in_shape}, - {input2_name, opt_in_shape}, - {input3_name, opt_in_shape}, - {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}}, - }; - - analysis_config.SetTRTDynamicShapeInfo( - min_input_shape, max_input_shape, opt_input_shape); -#endif int max_batch = 32; int min_subgraph_size = 3; if (params.use_trt()) { - analysis_config.EnableTensorRtEngine( + config.EnableTensorRtEngine( 1 << 20, max_batch, min_subgraph_size, - paddle::AnalysisConfig::Precision::kFloat32, + Config::Precision::kFloat32, false, false); LOG(INFO) << "create TensorRT predictor"; } else { if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } if (params.enable_ir_optimization()) { - analysis_config.SwitchIrOptim(true); + config.SwitchIrOptim(true); } else { - analysis_config.SwitchIrOptim(false); + config.SwitchIrOptim(false); } } AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -273,34 +204,6 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { } }; -class FluidGpuNativeDirCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; class Parameter { public: @@ -383,214 +286,6 @@ class Parameter { float* _params; }; -class SigmoidModel { - public: - ~SigmoidModel() {} - int load(const char* sigmoid_w_file, - const char* sigmoid_b_file, - float exp_max, - float exp_min) { - AutoLock lock(GlobalSigmoidCreateMutex::instance()); - if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) { - LOG(ERROR) << "load params sigmoid_w failed."; - return -1; - } - VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] [" - << _sigmoid_w._params[1] << "]."; - if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) { - LOG(ERROR) << "load params sigmoid_b failed."; - return -1; - } - VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] [" - << _sigmoid_b._params[1] << "]."; - _exp_max_input = exp_max; - _exp_min_input = exp_min; - return 0; - } - - int softmax(float x, double& o) { // NOLINT - float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0]; - float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1]; - _y0 = (_y0 > _exp_max_input) - ? _exp_max_input - : ((_y0 < _exp_min_input) ? _exp_min_input : _y0); - _y1 = (_y1 > _exp_max_input) - ? _exp_max_input - : ((_y1 < _exp_min_input) ? _exp_min_input : _y1); - o = 1.0f / (1.0f + exp(_y0 - _y1)); - return 0; - } - - public: - Parameter _sigmoid_w; - Parameter _sigmoid_b; - float _exp_max_input; - float _exp_min_input; -}; - -class SigmoidFluidModel { - public: - int softmax(float x, double& o) { // NOLINT - return _sigmoid_core->softmax(x, o); - } // NOLINT - - std::unique_ptr Clone() { - std::unique_ptr clone_model; - clone_model.reset(new SigmoidFluidModel()); - clone_model->_sigmoid_core = _sigmoid_core; - clone_model->_fluid_core = _fluid_core->Clone(); - return std::move(clone_model); - } - - public: - std::unique_ptr _fluid_core; - std::shared_ptr _sigmoid_core; -}; - -class FluidGpuWithSigmoidCore : public FluidFamilyCore { - public: - virtual ~FluidGpuWithSigmoidCore() {} - - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string model_path = params.get_path(); - size_t pos = model_path.find_last_of("/\\"); - std::string conf_path = model_path.substr(0, pos); - std::string conf_file = model_path.substr(pos); - configure::SigmoidConf conf; - if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) { - LOG(ERROR) << "failed load model path: " << model_path; - return -1; - } - - _core.reset(new SigmoidFluidModel); - - std::string fluid_model_data_path = conf.dnn_model_path(); - predictor::InferEngineCreationParams new_params(params); - new_params.set_path(fluid_model_data_path); - int ret = load_fluid_model(new_params); - if (ret < 0) { - LOG(ERROR) << "fail to load fluid model."; - return -1; - } - const char* sigmoid_w_file = conf.sigmoid_w_file().c_str(); - const char* sigmoid_b_file = conf.sigmoid_b_file().c_str(); - float exp_max = conf.exp_max_input(); - float exp_min = conf.exp_min_input(); - _core->_sigmoid_core.reset(new SigmoidModel); - LOG(INFO) << "create sigmoid core[" << _core->_sigmoid_core.get() - << "], use count[" << _core->_sigmoid_core.use_count() << "]."; - ret = _core->_sigmoid_core->load( - sigmoid_w_file, sigmoid_b_file, exp_max, exp_min); - if (ret < 0) { - LOG(ERROR) << "fail to load sigmoid model."; - return -1; - } - return 0; - } - - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->_fluid_core->Run( - *(std::vector*)in_data, - (std::vector*)out_data)) { - LOG(ERROR) << "Failed call Run with paddle predictor"; - return false; - } - - return true; - } - - virtual int clone(SigmoidFluidModel* origin_core) { - if (origin_core == NULL) { - LOG(ERROR) << "origin paddle Predictor is null."; - return -1; - } - _core = origin_core->Clone(); - if (_core.get() == NULL) { - LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; - return -1; - } - LOG(INFO) << "clone sigmoid core[" << _core->_sigmoid_core.get() - << "] use count[" << _core->_sigmoid_core.use_count() << "]."; - return 0; - } - - virtual SigmoidFluidModel* get() { return _core.get(); } - - virtual int load_fluid_model( - const predictor::InferEngineCreationParams& params) = 0; - - int softmax(float x, double& o) { // NOLINT - return _core->_sigmoid_core->softmax(x, o); - } - - protected: - std::unique_ptr _core; -}; - -class FluidGpuNativeDirWithSigmoidCore : public FluidGpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor( - native_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.EnableUseGpu(100, FLAGS_gpuid); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); - - if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); - } - - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp index 7447a417338a37716eff025721126e4c817408a6..c00ea8719414f5ac324ac62e3e36128ad6035f91 100644 --- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp @@ -32,28 +32,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_GPU_ANALYSIS_DIR"); -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidGpuAnalysisDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_ANALYSIS_DIR_SIGMOID"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE_DIR"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidGpuNativeDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE_DIR_SIGMOID"); - } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/python/examples/fit_a_line/local_train.py b/python/examples/fit_a_line/local_train.py index 900b4a674a96434f4e848d1d4fd8f2ebed79f148..3e0f8880a4d006b346712f2592d6c44986882193 100644 --- a/python/examples/fit_a_line/local_train.py +++ b/python/examples/fit_a_line/local_train.py @@ -16,7 +16,7 @@ import sys import paddle import paddle.fluid as fluid - +paddle.enable_static() train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.uci_housing.train(), buf_size=500),