diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake index fb3d8ef8d53436f387acc3069a0eb887e6f07c59..8b7d91f234594becdda805c089fac0bb4e4e8e44 100644 --- a/cmake/external/anakin.cmake +++ b/cmake/external/anakin.cmake @@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library") set(ANAKIN_COMPILE_EXTRA_FLAGS + -Wno-error=unused-but-set-variable -Wno-unused-but-set-variable -Wno-error=unused-variable -Wno-unused-variable -Wno-error=format-extra-args -Wno-format-extra-args -Wno-error=comment -Wno-comment @@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-reorder -Wno-error=cpp) -set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz") +set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz") # A helper function used in Anakin, currently, to use it, one need to recursively include # nearly all the header files. @@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}") message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}") execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") - execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz") endif() if (WITH_ANAKIN) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 3e60a61793339990648737c3d549d46cc5f5a887..259d79bedbf664f52b1189ca71567665a6d91180 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -19,6 +19,7 @@ endif(APPLE) set(inference_deps paddle_inference_api paddle_fluid_api) + if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine) endif() @@ -63,6 +64,8 @@ endif() if (WITH_ANAKIN) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to + # compile the libinference_anakin_api.a and compile with anakin.so. + fetch_include_recursively(${ANAKIN_INCLUDE}) # compile the libinference_anakin_api.a and anakin.so. nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc) nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc) @@ -73,7 +76,7 @@ if (WITH_ANAKIN) # only needed in CI if (WITH_TESTING) cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin - DEPS inference_anakin_api) + DEPS inference_anakin_api_shared) target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) endif(WITH_TESTING) endif() diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 0206ac60103759deda91be741617bde63e003de6..6b374ceefbc180a5c22abe591f12e1c3d89bc64a 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -18,26 +18,36 @@ namespace paddle { -PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( +template +PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( const AnakinConfig &config) { CHECK(Init(config)); } -bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { +template +bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { if (!(graph_.load(config.model_file))) { + LOG(FATAL) << "fail to load graph from " << config.model_file; return false; } - graph_.ResetBatchSize("input_0", config.max_batch_size); + auto inputs = graph_.get_ins(); + for (auto &input_str : inputs) { + graph_.ResetBatchSize(input_str, config.max_batch_size); + } // optimization for graph if (!(graph_.Optimize())) { return false; } // construct executer - executor_.init(graph_); + if (executor_p_ == nullptr) { + executor_p_ = new anakin::Net(graph_, true); + } return true; } -bool PaddleInferenceAnakinPredictor::Run( +template +bool PaddleInferenceAnakinPredictor::Run( const std::vector &inputs, std::vector *output_data, int batch_size) { for (const auto &input : inputs) { @@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run( << "'s type is not float"; return false; } - auto d_tensor_in_p = executor_.get_in(input.name); + auto d_tensor_in_p = executor_p_->get_in(input.name); + auto net_shape = d_tensor_in_p->valid_shape(); + if (net_shape.size() != input.shape.size()) { + LOG(ERROR) << " input " << input.name + << "'s shape size should be equal to that of net"; + return false; + } + int sum = 1; + for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; }); + if (sum > net_shape.count()) { + graph_.Reshape(input.name, input.shape); + delete executor_p_; + executor_p_ = new anakin::Net(graph_, true); + d_tensor_in_p = executor_p_->get_in(input.name); + } + + anakin::saber::Shape tmp_shape; + for (auto s : input.shape) { + tmp_shape.push_back(s); + } + d_tensor_in_p->reshape(tmp_shape); + float *d_data_p = d_tensor_in_p->mutable_data(); if (cudaMemcpy(d_data_p, static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), @@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run( } cudaStreamSynchronize(NULL); } - - executor_.prediction(); + cudaDeviceSynchronize(); + executor_p_->prediction(); + cudaDeviceSynchronize(); if (output_data->empty()) { LOG(ERROR) << "At least one output should be set with tensors' names."; return false; } for (auto &output : *output_data) { - auto *tensor = executor_.get_out(output.name); - output.shape = tensor->shape(); + auto *tensor = executor_p_->get_out(output.name); + output.shape = tensor->valid_shape(); if (output.data.length() < tensor->valid_size() * sizeof(float)) { output.data.Resize(tensor->valid_size() * sizeof(float)); } @@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run( return true; } -anakin::Net - &PaddleInferenceAnakinPredictor::get_executer() { - return executor_; +template +anakin::Net + &PaddleInferenceAnakinPredictor::get_executer() { + return *executor_p_; } // the cloned new Predictor of anakin share the same net weights from original // Predictor -std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { +template +std::unique_ptr +PaddleInferenceAnakinPredictor::Clone() { VLOG(3) << "Anakin Predictor::clone"; - std::unique_ptr cls(new PaddleInferenceAnakinPredictor()); + std::unique_ptr cls( + new PaddleInferenceAnakinPredictor()); // construct executer from other graph auto anakin_predictor_p = - dynamic_cast(cls.get()); + dynamic_cast *>(cls.get()); if (!anakin_predictor_p) { LOG(ERROR) << "fail to call Init"; return nullptr; @@ -103,14 +140,28 @@ std::unique_ptr PaddleInferenceAnakinPredictor::Clone() { return std::move(cls); } +template class PaddleInferenceAnakinPredictor; +template class PaddleInferenceAnakinPredictor; + // A factory to help create difference predictor. template <> std::unique_ptr CreatePaddlePredictor< AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { VLOG(3) << "Anakin Predictor create."; - std::unique_ptr x( - new PaddleInferenceAnakinPredictor(config)); - return x; -} + if (config.target_type == AnakinConfig::NVGPU) { + VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else if (config.target_type == AnakinConfig::X86) { + VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; + std::unique_ptr x( + new PaddleInferenceAnakinPredictor(config)); + return x; + } else { + VLOG(3) << "Anakin Predictor create on unknown platform."; + return nullptr; + } +}; } // namespace paddle diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index def096c867ec85624f5b221782ef8b6240923c05..836badd9799228c6c294dcad5df73d039d36a1ff 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -20,14 +20,16 @@ limitations under the License. */ #pragma once #include -#include "paddle/fluid/inference/api/paddle_inference_api.h" -// from anakin #include "framework/core/net/net.h" +#include "framework/graph/graph.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "saber/core/shape.h" #include "saber/saber_types.h" namespace paddle { +template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: PaddleInferenceAnakinPredictor() {} @@ -42,19 +44,21 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor { std::unique_ptr Clone() override; - anakin::Net& + anakin::Net& get_executer(); - ~PaddleInferenceAnakinPredictor() override{}; + ~PaddleInferenceAnakinPredictor() override { + delete executor_p_; + executor_p_ = nullptr; + }; private: bool Init(const AnakinConfig& config); - anakin::graph::Graph + anakin::graph::Graph graph_; - anakin::Net - executor_; + anakin::Net* + executor_p_{nullptr}; AnakinConfig config_; }; diff --git a/paddle/fluid/inference/api/api_anakin_engine_tester.cc b/paddle/fluid/inference/api/api_anakin_engine_tester.cc index 7554fe4989b3f98e5af13dfb51b549083e4cd777..62e820b68c79a47d963bb174663bfc8c4ac22de3 100644 --- a/paddle/fluid/inference/api/api_anakin_engine_tester.cc +++ b/paddle/fluid/inference/api/api_anakin_engine_tester.cc @@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include +#include "gflags/gflags.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" -DEFINE_string(model, "", "Directory of the inference model."); +DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); namespace paddle { AnakinConfig GetConfig() { AnakinConfig config; + // using AnakinConfig::X86 if you need to use cpu to do inference + config.target_type = AnakinConfig::NVGPU; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1; @@ -36,7 +38,6 @@ TEST(inference, anakin) { CreatePaddlePredictor(config); float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleTensor tensor; tensor.name = "input_0"; tensor.shape = std::vector({1, 3, 224, 224}); @@ -44,22 +45,20 @@ TEST(inference, anakin) { tensor.dtype = PaddleDType::FLOAT32; // For simplicity, we set all the slots with the same data. - std::vector paddle_tensor_feeds; - paddle_tensor_feeds.emplace_back(std::move(tensor)); + std::vector paddle_tensor_feeds(1, tensor); PaddleTensor tensor_out; tensor_out.name = "prob_out"; - tensor_out.shape = std::vector({1000, 1}); + tensor_out.shape = std::vector({}); tensor_out.data = PaddleBuf(); tensor_out.dtype = PaddleDType::FLOAT32; - std::vector outputs; - outputs.emplace_back(std::move(tensor_out)); + std::vector outputs(1, tensor_out); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); float* data_o = static_cast(outputs[0].data.data()); - for (size_t j = 0; j < 1000; ++j) { + for (size_t j = 0; j < outputs[0].data.length(); ++j) { LOG(INFO) << "output[" << j << "]: " << data_o[j]; } } diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 3342ee3c25446232e15b377229cdc303c0a0b40d..e326f19226e2e4d4a1329863f56eb732b03bc7b2 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -126,9 +126,11 @@ struct NativeConfig : public PaddlePredictor::Config { // Configurations for Anakin engine. struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; int device; std::string model_file; int max_batch_size{-1}; + TargetType target_type; }; struct TensorRTConfig : public NativeConfig {