From 2f0df564223581b3414d8793a6459d0bb31a2ba1 Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Sun, 27 May 2018 04:03:18 -0700 Subject: [PATCH] add inference interface impl --- paddle/contrib/inference/CMakeLists.txt | 35 ++ .../contrib/inference/paddle_inference_api.h | 29 +- .../inference/paddle_inference_api_impl.cc | 309 ++++++++++++++++++ .../inference/paddle_inference_api_impl.h | 76 +++++ .../test_paddle_inference_api_impl.cc | 83 +++++ paddle/fluid/inference/CMakeLists.txt | 1 + 6 files changed, 523 insertions(+), 10 deletions(-) create mode 100644 paddle/contrib/inference/paddle_inference_api_impl.cc create mode 100644 paddle/contrib/inference/paddle_inference_api_impl.h create mode 100644 paddle/contrib/inference/test_paddle_inference_api_impl.cc diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 26b0cfa27a..a4fe10f708 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -13,10 +13,45 @@ # limitations under the License. # +function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST) + set(options "") + set(oneValueArgs "") + set(multiValueArgs ARGS) + cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + set(arg_list "") + if(inference_test_ARGS) + foreach(arg ${inference_test_ARGS}) + list(APPEND arg_list "_${arg}") + endforeach() + else() + list(APPEND arg_list "_") + endif() + foreach(arg ${arg_list}) + string(REGEX REPLACE "^_$" "" arg "${arg}") + cc_test(${TARGET_NAME} + SRCS ${TEST_SRC} + DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl + ARGS --dirname=${PYTHON_TESTS_DIR}/book/) + # set_tests_properties(${TARGET_NAME} + # PROPERTIES DEPENDS ${DEP_TEST}) + endforeach() +endfunction(inference_api_test) + + cc_library(paddle_inference_api SRCS paddle_inference_api.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) +cc_library(paddle_inference_api_impl + SRCS paddle_inference_api_impl.cc + DEPS paddle_inference_api paddle_fluid_api) + cc_test(test_paddle_inference_api SRCS test_paddle_inference_api.cc DEPS paddle_inference_api) + +inference_api_test(test_paddle_inference_api_impl + test_paddle_inference_api_impl.cc + test_word2vec) diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index db5092dc6e..9ac8ebdef8 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -27,29 +27,38 @@ namespace paddle { +enum PaddleDType { + FLOAT32, + INT64, +}; + +struct PaddleBuf { + void* data; // pointer to the data memory. + size_t length; // number of memory bytes. +}; + struct PaddleTensor { std::string name; // variable name. std::vector shape; - std::vector data; // bytes of data. - size_t type{typeid(float).hash_code()}; // hash of type + PaddleBuf data; // blob of data. + PaddleDType dtype; }; /* - * A simple Inference API for Paddle. Currently this API might just be used by - * non-sequence scenerios. - * TODO(Superjomn) Prepare another API for NLP-related usages. - */ +* A simple Inference API for Paddle. Currently this API might just be used by +* non-sequence scenerios. +* TODO(Superjomn) Prepare another API for NLP-related usages. +*/ class PaddlePredictor { public: struct Config; PaddlePredictor() = default; PaddlePredictor(const PaddlePredictor&) = delete; - // One drived class should has such a constructor - // PaddlePredictor(const XConfig& config); - // The XConfig is a derived class of Config. - // Predict an record. + // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be alive until Run returns. caller should be + // responsible for releasing the memory of `output_data`. virtual bool Run(const std::vector& inputs, std::vector* output_data) = 0; diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc new file mode 100644 index 0000000000..ecca16d3f8 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -0,0 +1,309 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api_impl.h" + +namespace paddle { +namespace { + +// Timer for timer +class Timer { +public: + double start; + double startu; + void tic() { + struct timeval tp; + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + } + double toc() { + struct timeval tp; + gettimeofday(&tp, NULL); + double used_time_ms = + (tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0; + return used_time_ms; + } +}; + +template +std::string num2str(T a) { + std::stringstream istr; + istr << a; + return istr.str(); +} +} // namespace + +bool PaddlePredictorImpl::Init() { + VLOG(3) << "Predictor::init()"; + + // TODO(panyx0718): Should CPU vs GPU device be decided by id? + if (config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + paddle::framework::InitDevices(false); + executor_.reset(new paddle::framework::Executor(place_)); + scope_.reset(new paddle::framework::Scope()); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + ctx_ = executor_->Prepare(*inference_program_, 0); + + // Create variables + // TODO(panyx0718): Why need to test share_variables here? + if (config_.share_variables) { + executor_->CreateVariables(*inference_program_, scope_.get(), 0); + } + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::Run(const std::vector &inputs, + std::vector *output_data) { + VLOG(3) << "Predictor::predict"; + Timer timer; + timer.tic(); + // set feed variable + std::map feed_targets; + std::vector feeds; + if (!SetFeed(inputs, &feeds)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + feed_targets[feed_target_names_[i]] = &feeds[i]; + } + // get fetch variable + std::map fetch_targets; + std::vector fetchs; + fetchs.resize(fetch_target_names_.size()); + for (size_t i = 0; i < fetch_target_names_.size(); ++i) { + fetch_targets[fetch_target_names_[i]] = &fetchs[i]; + } + // Run the inference program + // if share variables, we need not create variables + executor_->RunPreparedContext(ctx_.get(), + scope_.get(), + &feed_targets, + &fetch_targets, + !config_.share_variables); + if (!GetFetch(fetchs, output_data)) { + LOG(ERROR) << "fail to get fetchs"; + return false; + } + VLOG(3) << "predict cost: " << timer.toc() << "ms"; + return true; +} + +std::unique_ptr PaddlePredictorImpl::Clone() { + VLOG(3) << "Predictor::clone"; + std::unique_ptr cls(new PaddlePredictorImpl(config_)); + if (!cls->InitShared(this)) { + LOG(ERROR) << "fail to call InitShared"; + return nullptr; + } + return cls; +} + +// TODO(panyx0718): Consider merge with Init()? +bool PaddlePredictorImpl::InitShared(PaddlePredictorImpl *cls) { + VLOG(3) << "Predictor::init_shared"; + // 1. Define place, executor, scope + if (this->config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(); + } else { + place_ = paddle::platform::CPUPlace(); + } + this->executor_.reset(new paddle::framework::Executor(this->place_)); + this->scope_.reset(new paddle::framework::Scope()); + // Initialize the inference program + if (!this->config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + this->inference_program_ = paddle::inference::Load( + this->executor_.get(), this->scope_.get(), this->config_.model_dir); + } else if (!this->config_.prog_file.empty() && + !this->config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + this->inference_program_ = + paddle::inference::Load(this->executor_.get(), + this->scope_.get(), + this->config_.prog_file, + this->config_.param_file); + } + this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0); + // 3. create variables + // TODO(panyx0718): why test share_variables. + if (config_.share_variables) { + this->executor_->CreateVariables( + *this->inference_program_, this->scope_.get(), 0); + } + // 4. Get the feed_target_names and fetch_target_names + this->feed_target_names_ = this->inference_program_->GetFeedTargetNames(); + this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::SetFeed( + const std::vector &inputs, + std::vector *feeds) { + VLOG(3) << "Predictor::set_feed"; + if (inputs.size() != feed_target_names_.size()) { + LOG(ERROR) << "wrong feed input size."; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + paddle::framework::LoDTensor input; + paddle::framework::DDim ddim = + paddle::framework::make_ddim(inputs[i].shape); + void *input_ptr; + if (inputs[i].dtype == PaddleDType::INT64) { + input_ptr = + input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else if (inputs[i].dtype == PaddleDType::FLOAT32) { + input_ptr = input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else { + LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; + return false; + } + + // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. + std::memcpy(static_cast(input_ptr), + inputs[i].data.data, + inputs[i].data.length); + feeds->push_back(input); + LOG(ERROR) << "Actual feed type " << feeds->back().type().name(); + } + return true; +} + +bool PaddlePredictorImpl::GetFetch( + const std::vector &fetchs, + std::vector *outputs) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetchs.size()); + for (size_t i = 0; i < fetchs.size(); ++i) { + // TODO(panyx0718): Support fetch of other types. + if (fetchs[i].type() != typeid(float)) { + LOG(ERROR) << "only support fetching float now."; + return false; + } + std::vector shape; + auto dims_i = fetchs[i].dims(); + auto lod = fetchs[i].lod(); + const float *output_ptr = fetchs[i].data(); + // const int64_t* output_ptr = fetchs[i].data(); + auto num = fetchs[i].numel(); + std::vector data; + if (0 == lod.size()) { + std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); + for (int j = 0; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } else { + // for batch detection + // image[0] -> output[0] shape {145, 6} + // image[1] -> output[1] shape {176, 6} + // then, + // the batch output shape {321, 6} + // the lod {{0, 145, 321}} + // so we should append output[0] to {176, 6} + size_t max_dim = 0; + for (size_t j = 1; j < lod[0].size(); j++) { + max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); + } + size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); + if (max_dim > 0) { + data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); + } + for (size_t j = 1; j < lod[0].size(); j++) { + size_t start = lod[0][j - 1] * common_dim; + size_t end = lod[0][j] * common_dim; + if (end > start) { + std::copy(output_ptr + start, + output_ptr + end, + data.begin() + (j - 1) * max_dim * common_dim); + } + } + shape.push_back(lod[0].size() - 1); + shape.push_back(max_dim); + for (int j = 1; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } + + outputs->at(i).shape = shape; + outputs->at(i).data.length = sizeof(float) * data.size(); + outputs->at(i).data.data = malloc(outputs->at(i).data.length); + std::memcpy( + outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + outputs->at(i).dtype = PaddleDType::FLOAT32; + // TODO(panyx0718): support other types? fill tensor name? avoid a copy. + } + return true; +} + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config) { + VLOG(3) << "create PaddlePredictorImpl"; + // 1. GPU memeroy + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + num2str(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + + std::unique_ptr predictor( + new PaddlePredictorImpl(config)); + if (!predictor->Init()) { + return nullptr; + } + return predictor; +} + +} // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h new file mode 100644 index 0000000000..831abce5da --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/init.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { + +struct VisConfig : public PaddlePredictor::Config { + int device; + float fraction_of_gpu_memory; + std::string prog_file; + std::string param_file; + bool share_variables; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class PaddlePredictorImpl : public PaddlePredictor { +public: + explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {} + + bool Init(); + + bool Run(const std::vector &inputs, + std::vector *output_data) override; + + std::unique_ptr Clone() override; + + ~PaddlePredictorImpl() override{}; + +private: + bool InitShared(PaddlePredictorImpl *cls); + bool SetFeed(const std::vector &input_datas, + std::vector *feeds); + bool GetFetch(const std::vector &fetchs, + std::vector *output_data); + + VisConfig config_; + paddle::platform::Place place_; + std::unique_ptr executor_; + std::unique_ptr scope_; + std::unique_ptr ctx_; + std::unique_ptr inference_program_; + std::vector feed_target_names_; + std::vector fetch_target_names_; +}; + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config); + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc new file mode 100644 index 0000000000..43b068fb42 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "gflags/gflags.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/tests/test_helper.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +namespace paddle { + +PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { + PaddleTensor pt; + pt.data.data = t->data(); + + if (t->type() == typeid(int64_t)) { + pt.data.length = t->numel() * sizeof(int64_t); + pt.dtype = PaddleDType::INT64; + } else if (t->type() == typeid(float)) { + pt.data.length = t->numel() * sizeof(float); + pt.dtype = PaddleDType::FLOAT32; + } else { + LOG(FATAL) << "unsupported type."; + } + pt.shape = framework::vectorize2int(t->dims()); + return pt; +} + +TEST(paddle_inference_api_impl, word2vec) { + VisConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + LOG(INFO) << "dirname " << config.model_dir; + config.fraction_of_gpu_memory = 0.85; + config.device = 0; + config.share_variables = true; + + std::unique_ptr predictor = + CreatePaddlePredictorImpl(config); + + framework::LoDTensor first_word, second_word, third_word, fourth_word; + framework::LoD lod{{0, 1}}; + int64_t dict_size = 2073; // The size of dictionary + + SetupLoDTensor(&first_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&second_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&third_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&fourth_word, lod, static_cast(0), dict_size - 1); + + std::vector cpu_feeds; + cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word)); + + std::vector outputs; + ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs)); + ASSERT_EQ(outputs.size(), 1); + for (size_t i = 0; i < outputs.size(); ++i) { + size_t len = outputs[i].data.length; + float* data = static_cast(outputs[i].data.data); + for (int j = 0; j < len / sizeof(float); ++j) { + ASSERT_LT(data[j], 1.0); + ASSERT_GT(data[j], -1.0); + } + free(outputs[i].data.data); + } +} + +} // namespace paddle diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index b98aeed8a0..cc4a725dfb 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,5 +1,6 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) -- GitLab