From 046de2acdbb6f1686bfe33f77ca5f95e84fc0405 Mon Sep 17 00:00:00 2001 From: cuichaowen <939136265@qq.com> Date: Tue, 31 Jul 2018 20:23:58 +0800 Subject: [PATCH] Improve anakin feature (#11961) --- cmake/external/anakin.cmake | 7 +- paddle/fluid/inference/api/CMakeLists.txt | 5 +- .../fluid/inference/api/api_anakin_engine.cc | 91 +++++++++++++++---- .../fluid/inference/api/api_anakin_engine.h | 20 ++-- .../inference/api/api_anakin_engine_tester.cc | 17 ++-- .../inference/api/paddle_inference_api.h | 2 + 6 files changed, 101 insertions(+), 41 deletions(-) diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake index fb3d8ef8d5..8b7d91f234 100644 --- a/cmake/external/anakin.cmake +++ b/cmake/external/anakin.cmake @@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library") set(ANAKIN_COMPILE_EXTRA_FLAGS + -Wno-error=unused-but-set-variable -Wno-unused-but-set-variable -Wno-error=unused-variable -Wno-unused-variable -Wno-error=format-extra-args -Wno-format-extra-args -Wno-error=comment -Wno-comment @@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-reorder -Wno-error=cpp) -set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz") +set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz") # A helper function used in Anakin, currently, to use it, one need to recursively include # nearly all the header files. @@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}") message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz") endif() if (WITH_ANAKIN) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 3e60a61793..259d79bedb 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -19,6 +19,7 @@ endif(APPLE) set(inference_deps paddle_inference_api paddle_fluid_api) + if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine) endif() @@ -63,6 +64,8 @@ endif() if (WITH_ANAKIN) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to + # compile the libinference_anakin_api.a and compile with anakin.so. + fetch_include_recursively(${ANAKIN_INCLUDE}) # compile the libinference_anakin_api.a and anakin.so. nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) @@ -73,7 +76,7 @@ if (WITH_ANAKIN) # only needed in CI if (WITH_TESTING) cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin - DEPS inference_anakin_api) + DEPS inference_anakin_api_shared) target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) endif(WITH_TESTING) endif() diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 0206ac6010..6b374ceefb 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -18,26 +18,36 @@ namespace paddle { -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( +template <typename Target> +PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor( const AnakinConfig &config) { CHECK(Init(config)); } -bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { +template <typename Target> +bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) { if (!(graph_.load(config.model_file))) { + LOG(FATAL) << "fail to load graph from " << config.model_file; return false; } - graph_.ResetBatchSize("input_0", config.max_batch_size); + auto inputs = graph_.get_ins(); + for (auto &input_str : inputs) { + graph_.ResetBatchSize(input_str, config.max_batch_size); + } // optimization for graph if (!(graph_.Optimize())) { return false; } // construct executer - executor_.init(graph_); + if (executor_p_ == nullptr) { + executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT, + anakin::Precision::FP32>(graph_, true); + } return true; } -bool PaddleInferenceAnakinPredictor::Run( +template <typename Target> +bool PaddleInferenceAnakinPredictor<Target>::Run( const std::vector<PaddleTensor> &inputs, std::vector<PaddleTensor> *output_data, int batch_size) { for (const auto &input : inputs) { @@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run( << "'s type is not float"; return false; } - auto d_tensor_in_p = executor_.get_in(input.name); + auto d_tensor_in_p = executor_p_->get_in(input.name); + auto net_shape = d_tensor_in_p->valid_shape(); + if (net_shape.size() != input.shape.size()) { + LOG(ERROR) << " input " << input.name + << "'s shape size should be equal to that of net"; + return false; + } + int sum = 1; + for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); + if (sum > net_shape.count()) { + graph_.Reshape(input.name, input.shape); + delete executor_p_; + executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT, + anakin::Precision::FP32>(graph_, true); + d_tensor_in_p = executor_p_->get_in(input.name); + } + + anakin::saber::Shape tmp_shape; + for (auto s : input.shape) { + tmp_shape.push_back(s); + } + d_tensor_in_p->reshape(tmp_shape); + float *d_data_p = d_tensor_in_p->mutable_data(); if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), @@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run( } cudaStreamSynchronize(NULL); } - - executor_.prediction(); + cudaDeviceSynchronize(); + executor_p_->prediction(); + cudaDeviceSynchronize(); if (output_data->empty()) { LOG(ERROR) << "At least one output should be set with tensors' names."; return false; } for (auto &output : *output_data) { - auto *tensor = executor_.get_out(output.name); - output.shape = tensor->shape(); + auto *tensor = executor_p_->get_out(output.name); + output.shape = tensor->valid_shape(); if (output.data.length() < tensor->valid_size() * sizeof(float)) { output.data.Resize(tensor->valid_size() * sizeof(float)); } @@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run( return true; } -anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32> - &PaddleInferenceAnakinPredictor::get_executer() { - return executor_; +template <typename Target> +anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32> + &PaddleInferenceAnakinPredictor<Target>::get_executer() { + return *executor_p_; } // the cloned new Predictor of anakin share the same net weights from original // Predictor -std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() { +template <typename Target> +std::unique_ptr<PaddlePredictor> +PaddleInferenceAnakinPredictor<Target>::Clone() { VLOG(3) << "Anakin Predictor::clone"; - std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor()); + std::unique_ptr<PaddlePredictor> cls( + new PaddleInferenceAnakinPredictor<Target>()); // construct executer from other graph auto anakin_predictor_p = - dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get()); + dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get()); if (!anakin_predictor_p) { LOG(ERROR) << "fail to call Init"; return nullptr; @@ -103,14 +140,28 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() { return std::move(cls); } +template class PaddleInferenceAnakinPredictor<anakin::NV>; +template class PaddleInferenceAnakinPredictor<anakin::X86>; + // A factory to help create difference predictor. template <> std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { VLOG(3) << "Anakin Predictor create."; - std::unique_ptr<PaddlePredictor> x( - new PaddleInferenceAnakinPredictor(config)); - return x; -} + if (config.target_type == AnakinConfig::NVGPU) { + VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; + std::unique_ptr<PaddlePredictor> x( + new PaddleInferenceAnakinPredictor<anakin::NV>(config)); + return x; + } else if (config.target_type == AnakinConfig::X86) { + VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; + std::unique_ptr<PaddlePredictor> x( + new PaddleInferenceAnakinPredictor<anakin::X86>(config)); + return x; + } else { + VLOG(3) << "Anakin Predictor create on unknown platform."; + return nullptr; + } +}; } // namespace paddle diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index def096c867..836badd979 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -20,14 +20,16 @@ limitations under the License. */ #pragma once #include <vector> -#include "paddle/fluid/inference/api/paddle_inference_api.h" -// from anakin #include "framework/core/net/net.h" +#include "framework/graph/graph.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "saber/core/shape.h" #include "saber/saber_types.h" namespace paddle { +template <typename Target> class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: PaddleInferenceAnakinPredictor() {} @@ -42,19 +44,21 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { std::unique_ptr<PaddlePredictor> Clone() override; - anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>& + anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>& get_executer(); - ~PaddleInferenceAnakinPredictor() override{}; + ~PaddleInferenceAnakinPredictor() override { + delete executor_p_; + executor_p_ = nullptr; + }; private: bool Init(const AnakinConfig& config); - anakin::graph::Graph<anakin::NV, anakin::saber::AK_FLOAT, - anakin::Precision::FP32> + anakin::graph::Graph<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32> graph_; - anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32> - executor_; + anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>* + executor_p_{nullptr}; AnakinConfig config_; }; diff --git a/paddle/fluid/inference/api/api_anakin_engine_tester.cc b/paddle/fluid/inference/api/api_anakin_engine_tester.cc index 7554fe4989..62e820b68c 100644 --- a/paddle/fluid/inference/api/api_anakin_engine_tester.cc +++ b/paddle/fluid/inference/api/api_anakin_engine_tester.cc @@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include <gflags/gflags.h> #include <glog/logging.h> #include <gtest/gtest.h> +#include "gflags/gflags.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" -DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); namespace paddle { AnakinConfig GetConfig() { AnakinConfig config; + // using AnakinConfig::X86 if you need to use cpu to do inference + config.target_type = AnakinConfig::NVGPU; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1; @@ -36,7 +38,6 @@ TEST(inference, anakin) { CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config); float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleTensor tensor; tensor.name = "input_0"; tensor.shape = std::vector<int>({1, 3, 224, 224}); @@ -44,22 +45,20 @@ TEST(inference, anakin) { tensor.dtype = PaddleDType::FLOAT32; // For simplicity, we set all the slots with the same data. - std::vector<PaddleTensor> paddle_tensor_feeds; - paddle_tensor_feeds.emplace_back(std::move(tensor)); + std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor); PaddleTensor tensor_out; tensor_out.name = "prob_out"; - tensor_out.shape = std::vector<int>({1000, 1}); + tensor_out.shape = std::vector<int>({}); tensor_out.data = PaddleBuf(); tensor_out.dtype = PaddleDType::FLOAT32; - std::vector<PaddleTensor> outputs; - outputs.emplace_back(std::move(tensor_out)); + std::vector<PaddleTensor> outputs(1, tensor_out); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); float* data_o = static_cast<float*>(outputs[0].data.data()); - for (size_t j = 0; j < 1000; ++j) { + for (size_t j = 0; j < outputs[0].data.length(); ++j) { LOG(INFO) << "output[" << j << "]: " << data_o[j]; } } diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 3342ee3c25..e326f19226 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -126,9 +126,11 @@ struct NativeConfig : public PaddlePredictor::Config { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; int device; std::string model_file; int max_batch_size{-1}; + TargetType target_type; }; struct TensorRTConfig : public NativeConfig { -- GitLab