diff --git a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc index ceef27ac1ce3c0a8ecd15f86a2dbae098059e0a8..0878192ebf8d529bbfa8d3dfcc5ad2d821e24813 100644 --- a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc +++ b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc @@ -13,14 +13,12 @@ // limitations under the License. #include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h" - -#include "paddle/fluid/platform/device/ipu/ipu_backend.h" - #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" +#include "paddle/fluid/platform/device/ipu/ipu_backend.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 3c66e35abf1d59a43e86541614a56ad78f957416..6ff25597125c5f0b13ee603bc17329a351074a8b 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -49,6 +49,9 @@ if(WIN32 AND WITH_GPU) cc_library(paddle_inference DEPS ${fluid_modules} ${pten_modules} ${STATIC_INFERENCE_API}) else() create_static_lib(paddle_inference ${fluid_modules} ${pten_modules} ${STATIC_INFERENCE_API}) + if(WITH_IPU) + target_link_libraries(paddle_inference -Wl,--allow-multiple-definition popart_canonicalization_utils) + endif() endif() if(NOT APPLE) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index ad96a4e3437bebd67bd20430f6b0ff3af19aafbf..aff2f60551de93755af34ec742feaab08f32c8ca 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -273,6 +273,14 @@ struct Argument { DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads, int); + // ipu related + DECL_ARGUMENT_FIELD(use_ipu, UseIpu, bool); + DECL_ARGUMENT_FIELD(ipu_device_num, IpuDeviceNum, int); + DECL_ARGUMENT_FIELD(ipu_enable_pipelining, IpuEnablePipelining, bool); + DECL_ARGUMENT_FIELD(ipu_batches_per_step, IpuBatchesPerStep, int); + DECL_ARGUMENT_FIELD(ipu_batch_size, IpuBatchSize, int); + DECL_ARGUMENT_FIELD(ipu_need_avg_shard, IpuNeedAvgShard, bool); + private: std::unordered_set valid_fields_; }; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 188b2ff851d96fa76edd666c696d98ddb1dcb948..fe6a27f80725f8e6520c0988f195419eb8a0cc1d 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -65,6 +65,27 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { platform::errors::PreconditionNotMet( "The scope ptr should not be nullptr.")); argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr); + +// ipu related +#ifdef PADDLE_WITH_IPU + if (argument->Has("use_ipu")) { + if (argument->use_ipu()) { + argument->main_graph().SetNotOwned("num_ipus", + &argument->ipu_device_num()); + argument->main_graph().SetNotOwned("need_avg_shard", + &argument->ipu_need_avg_shard()); + argument->main_graph().SetNotOwned("enable_pipelining", + &argument->ipu_enable_pipelining()); + argument->main_graph().SetNotOwned("batches_per_step", + &argument->ipu_batches_per_step()); + argument->main_graph().SetNotOwned("batch_size", + &argument->ipu_batch_size()); + } else { + PADDLE_THROW( + platform::errors::Unimplemented("Please compile with WITH_IPU")); + } + } +#endif } std::unique_ptr IrGraphBuildPass::LoadModel( diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 49c4b8d7372e276de7b0979d8c4b9505f9453c91..a1ab69906bfc443d7441647a68f3a4fa1be5e3b1 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -46,6 +46,9 @@ PassStrategy *AnalysisConfig::pass_builder() const { pass_builder_.reset(new XpuPassStrategy); } else if (use_npu_) { pass_builder_.reset(new NpuPassStrategy); + } else if (use_ipu_) { + LOG(INFO) << "Create IPU IR passes"; + pass_builder_.reset(new IpuPassStrategy); } else { LOG(INFO) << "Create CPU IR passes"; pass_builder_.reset(new CpuPassStrategy); @@ -139,6 +142,20 @@ void AnalysisConfig::EnableNpu(int device_id) { Update(); } +void AnalysisConfig::EnableIpu(int device_num, bool ipu_enable_pipelining, + int ipu_batches_per_step, int ipu_batch_size, + bool ipu_need_avg_shard) { + enable_ir_optim_ = true; + + use_ipu_ = true; + ipu_device_num_ = device_num; + ipu_enable_pipelining_ = ipu_enable_pipelining; + ipu_batches_per_step_ = ipu_batches_per_step; + ipu_batch_size_ = ipu_batch_size; + ipu_need_avg_shard_ = ipu_need_avg_shard; + + Update(); +} AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { #define CP_MEMBER(member__) member__ = other.member__; @@ -233,12 +250,23 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(thread_local_stream_); + // ipu related + CP_MEMBER(use_ipu_); + CP_MEMBER(ipu_device_num_); + CP_MEMBER(ipu_enable_pipelining_); + CP_MEMBER(ipu_batches_per_step_); + CP_MEMBER(ipu_batch_size_); + CP_MEMBER(ipu_need_avg_shard_); + if (use_gpu_) { PADDLE_ENFORCE_EQ(use_xpu_, false, platform::errors::InvalidArgument( "Only one choice can be made between CPU and XPU.")); pass_builder_.reset(new GpuPassStrategy( *static_cast(other.pass_builder()))); + } else if (use_ipu_) { + pass_builder_.reset(new IpuPassStrategy( + *static_cast(other.pass_builder()))); } else if (use_xpu_) { pass_builder_.reset(new XpuPassStrategy( *static_cast(other.pass_builder()))); @@ -413,7 +441,8 @@ void AnalysisConfig::Update() { // Transfer pass_builder and copy the existing compatible passes. if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) || ((use_xpu() ^ pass_builder_->use_xpu())) || - ((use_npu() ^ pass_builder_->use_npu()))) { + ((use_npu() ^ pass_builder_->use_npu())) || + ((use_ipu() ^ pass_builder_->use_ipu()))) { if (use_gpu()) { pass_builder_.reset(new GpuPassStrategy); @@ -421,6 +450,9 @@ void AnalysisConfig::Update() { // Append after the Affine_channel_conv_fuse pass. pass_builder()->InsertPass(3, "tensorrt_subgraph_pass"); } + } else if (use_ipu()) { + VLOG(1) << "IpuPassStrategy has been used for new."; + pass_builder_.reset(new IpuPassStrategy); } else if (use_xpu()) { PADDLE_ENFORCE_EQ( use_gpu(), false, @@ -441,6 +473,10 @@ void AnalysisConfig::Update() { if (use_gpu()) { pass_builder_.reset(new GpuPassStrategy( *static_cast(pass_builder_.get()))); + } else if (use_ipu()) { + VLOG(1) << "IpuPassStrategy has been used."; + pass_builder_.reset(new IpuPassStrategy( + *static_cast(pass_builder_.get()))); } else if (use_xpu()) { PADDLE_ENFORCE_EQ( use_gpu(), false, @@ -565,6 +601,13 @@ void AnalysisConfig::Update() { "with NPU-runtime.")); #endif } + if (use_ipu_) { +#ifndef PADDLE_WITH_IPU + PADDLE_THROW(platform::errors::Unavailable( + "You tried to enable the ipu " + "but did not have the option -DWITH_IPU compiled.")); +#endif + } if (ir_debug_) { pass_builder()->TurnOnDebug(); @@ -635,6 +678,13 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << thread_local_stream_; + ss << use_ipu_; + ss << ipu_device_num_; + ss << ipu_enable_pipelining_; + ss << ipu_batches_per_step_; + ss << ipu_batch_size_; + ss << ipu_need_avg_shard_; + return ss.str(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 2293b702468532e9782e2a9477c0cb9e5afa6d57..5d5719533e7a745e67949152ff2a83c1b06f2d06 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -110,6 +110,14 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t, // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), pt.data.data(), pt.data.length()); + } else if (platform::is_ipu_place(place)) { +#ifdef PADDLE_WITH_IPU + std::memcpy(static_cast(input_ptr), pt.data.data(), + pt.data.length()); +#else + PADDLE_THROW(paddle::platform::errors::Fatal( + "Not compile with WITH_IPU, should not reach here.")); +#endif } else if (platform::is_gpu_place(place)) { PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), false, platform::errors::InvalidArgument( @@ -294,6 +302,14 @@ bool AnalysisPredictor::CreateExecutor() { "engine), but Paddle was not compiled " "with LITE_WITH_NNADAPTER.")); } + } else if (config_.use_ipu()) { +#ifdef PADDLE_WITH_IPU + place_ = paddle::platform::IPUPlace(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use IPU forward propagation, but Paddle was not compiled " + "with WITH_IPU.")); +#endif } else { place_ = paddle::platform::CPUPlace(); } @@ -643,6 +659,13 @@ void AnalysisPredictor::PrepareArgument() { LOG(INFO) << "Lite subgraph engine is enabled"; } + argument_.SetUseIpu(config_.use_ipu_); + argument_.SetIpuDeviceNum(config_.ipu_device_num()); + argument_.SetIpuEnablePipelining(config_.ipu_enable_pipelining_); + argument_.SetIpuBatchesPerStep(config_.ipu_batches_per_step_); + argument_.SetIpuBatchSize(config_.ipu_batch_size_); + argument_.SetIpuNeedAvgShard(config_.ipu_need_avg_shard_); + if (config_.use_mkldnn_) { LOG(INFO) << "MKLDNN is enabled"; argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); @@ -916,6 +939,10 @@ std::unique_ptr AnalysisPredictor::GetInputTensor( res->SetName(name); if (platform::is_cpu_place(place_)) { res->SetPlace(PaddlePlace::kCPU); + } else if (platform::is_ipu_place(place_)) { + // Currently, IPUPlace's tensor copy between cpu and ipu has been set in + // IpuBackend. + res->SetPlace(PaddlePlace::kCPU); } else if (platform::is_xpu_place(place_)) { if (config_.lite_engine_enabled()) { // Currently, Paddle-Lite's XPU user interface only supports the transfer @@ -951,6 +978,10 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( res->SetName(name); if (platform::is_cpu_place(place_)) { res->SetPlace(PaddlePlace::kCPU); + } else if (platform::is_ipu_place(place_)) { + // Currently, IPUPlace's tensor copy between cpu and ipu has been set in + // IpuBackend. + res->SetPlace(PaddlePlace::kCPU); } else if (platform::is_xpu_place(place_)) { if (config_.lite_engine_enabled()) { // Currently, Paddle-Lite's XPU user interface only supports the transfer diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index abe197f656b6013ca8bf45530239a9d7934189e5..01d4dbccd50eaf2c288110562784bdea5a66080b 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -239,6 +239,14 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); #else std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); +#endif + } else if (paddle::platform::is_ipu_place(t_place)) { +#ifdef PADDLE_WITH_IPU + std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); +#else + PADDLE_THROW(paddle::platform::errors::Unavailable( + "Can not create tensor with IPU place because paddle is not compiled " + "with IPU.")); #endif } else if (place_ == PlaceType::kGPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index f381b5fb23e4b81b09fa58ff038b2e90f9470c1f..77409f95b042eac630363e38bdb7994d5ba1096a 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -230,6 +230,24 @@ struct PD_INFER_DECL AnalysisConfig { bool autotune = true, const std::string& autotune_file = "", const std::string& precision = "int16", bool adaptive_seqlen = false); + + /// + /// \brief Turn on IPU. + /// + /// \param device_num The number of IPUs. + /// \param ipu_enable_pipelining Enable data pipelining between subgraphs, + /// each subgraph is settled on an IPU. (This feature requires the number of + /// IPUs > 1.) + /// \param ipu_batches_per_step The number of micro_batch_size per run. (This + /// feature requires to enable pipelining.) + /// \param ipu_batch_size The micro_batch_size which is the batch_size in the + /// graph. + /// \param ipu_need_avg_shard Enable the auto graph sharding. (This feature + /// requires the number of IPUs > 1.) + /// + void EnableIpu(int device_num = 1, bool ipu_enable_pipelining = false, + int ipu_batches_per_step = 1, int ipu_batch_size = 1, + bool ipu_need_avg_shard = false); /// /// \brief Set XPU device id. /// @@ -260,6 +278,11 @@ struct PD_INFER_DECL AnalysisConfig { /// \return bool Whether the NPU is turned on. /// bool use_npu() const { return use_npu_; } + /// \brief A boolean state telling whether the IPU is turned on. + /// + /// \return bool Whether the IPU is turned on. + /// + bool use_ipu() const { return use_ipu_; } /// /// \brief Get the GPU device id. /// @@ -278,6 +301,11 @@ struct PD_INFER_DECL AnalysisConfig { /// \return int The NPU device id. /// int npu_device_id() const { return npu_device_id_; } + /// \brief Get the the number of IPU device . + /// + /// \return int The number of IPU device. + /// + int ipu_device_num() const { return ipu_device_num_; } /// /// \brief Get the initial size in MB of the GPU memory pool. /// @@ -840,6 +868,15 @@ struct PD_INFER_DECL AnalysisConfig { bool use_mkldnn_bfloat16_{false}; std::unordered_set bfloat16_enabled_op_types_; + // ipu related. + bool use_ipu_{false}; + int ipu_device_num_{1}; + + bool ipu_enable_pipelining_{false}; + int ipu_batches_per_step_{1}; + int ipu_batch_size_{1}; + bool ipu_need_avg_shard_{false}; + // If the config is already used on a predictor, it becomes invalid. // Any config can only be used with one predictor. // Variables held by config can take up a lot of memory in some cases. diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index de2de112344eef1013c7cfef9a79811cd2acdc10..9023da40328e82ad94278d1c77e760fcd315442f 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -300,4 +300,8 @@ void CpuPassStrategy::EnableMkldnnBfloat16() { #endif } +IpuPassStrategy::IpuPassStrategy() : PassStrategy({}) { + passes_.assign({"inference_process_pass"}); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index f25060cd091b6a2209580565dd21f0bebc58fbfb..351cf71e5ca7493928dfd81d776d847463f3b7bf 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -148,6 +148,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { /// \return A bool variable implying whether we are in npu mode. bool use_npu() const { return use_npu_; } + /// \brief Check if we are using ipu. + /// \return A bool variable implying whether we are in ipu mode. + bool use_ipu() const { return use_ipu_; } + /// \brief Default destructor. virtual ~PassStrategy() = default; @@ -156,6 +160,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder { bool use_xpu_{false}; bool use_gpu_{false}; bool use_npu_{false}; + bool use_ipu_{false}; bool use_mkldnn_{false}; /// \endcond }; @@ -259,6 +264,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy { } }; +/// \class IpuPassStrategy +/// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU +/// mode. +class PD_INFER_DECL IpuPassStrategy final : public PassStrategy { + public: + /// \brief Default constructor of IpuPassStrategy. + IpuPassStrategy(); + + /// \brief Construct by copying another IpuPassStrategy object. + /// \param[in] other The IpuPassStrategy object we want to copy. + explicit IpuPassStrategy(const IpuPassStrategy &other) + : PassStrategy(other.AllPasses()) { + use_ipu_ = true; + } +}; + /// \brief List of tensorRT subgraph passes. PD_INFER_DECL extern const std::vector kTRTSubgraphPasses; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index a28b0c172aff0e7bea822b8bef7beb3a87945581..9dafd0d17c7157c0e351b67d0a01fccccbdbc47a 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -757,3 +757,12 @@ endif() if(ON_INFER OR WITH_GPU) set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120) endif() + +# IPU +if (WITH_IPU) + #resnet50 + set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") + inference_analysis_test(ipu_resnet50_test SRCS ipu_resnet50_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${RESNET50_MODEL_DIR} --warmup=true --repeat=1000) +endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc index d6ff3e422368bd9427e4cd3412429baf571c3303..77ec8ee7bfeeb80a36252aeffb369c22f1ff7eb8 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc @@ -122,5 +122,51 @@ TEST(Analyzer_Ernie, compare_results) { } } +#ifdef PADDLE_WITH_IPU +// IPU: Compare Deterministic result +TEST(Analyzer_Ernie_ipu, ipu_compare_determine) { + AnalysisConfig cfg; + SetIpuConfig(&cfg); + + std::vector> input_slots_all; + LoadInputData(&input_slots_all); + CompareDeterministic(reinterpret_cast(&cfg), + input_slots_all); +} + +// IPU: Compare results +TEST(Analyzer_Ernie_ipu, ipu_compare_results) { + AnalysisConfig cfg; + SetIpuConfig(&cfg); + + std::vector> input_slots_all; + LoadInputData(&input_slots_all); + + std::ifstream fin(FLAGS_refer_result); + std::string line; + std::vector ref; + + while (std::getline(fin, line)) { + Split(line, ' ', &ref); + } + + auto predictor = CreateTestPredictor( + reinterpret_cast(&cfg), + FLAGS_use_analysis); + + std::vector outputs; + for (size_t i = 0; i < input_slots_all.size(); i++) { + outputs.clear(); + predictor->Run(input_slots_all[i], &outputs); + auto outputs_size = outputs.front().data.length() / (sizeof(float)); + for (size_t j = 0; j < outputs_size; ++j) { + EXPECT_NEAR(ref[i * outputs_size + j], + static_cast(outputs[0].data.data())[j], + FLAGS_accuracy); + } + } +} +#endif + } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h index dd3faac7592104ba47c7f7db54c8c0114c8cb1f1..2582a1cb09eef02272f441376cec73b196142f10 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h +++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h @@ -148,5 +148,11 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false, cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads); } +void SetIpuConfig(AnalysisConfig *cfg, int batch_size = 1) { + cfg->SetModel(FLAGS_infer_model); + // num_ipu, enable_pipelining, batches_per_step, batch_size, need_avg_shard + cfg->EnableIpu(4, false, 1, batch_size, true); +} + } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ade4a911071ca1a176fc17d783326f2aefe89265 --- /dev/null +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc @@ -0,0 +1,115 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "gflags/gflags.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { + +static std::vector truth_values = { + 127.779f, 738.165f, 1013.22f, -438.17f, 366.401f, 927.659f, 736.222f, + -633.684f, -329.927f, -430.155f, -633.062f, -146.548f, -1324.28f, -1349.36f, + -242.675f, 117.448f, -801.723f, -391.514f, -404.818f, 454.16f, 515.48f, + -133.031f, 69.293f, 590.096f, -1434.69f, -1070.89f, 307.074f, 400.525f, + -316.12f, -587.125f, -161.056f, 800.363f, -96.4708f, 748.706f, 868.174f, + -447.938f, 112.737f, 1127.2f, 47.4355f, 677.72f, 593.186f, -336.4f, + 551.362f, 397.823f, 78.3979f, -715.398f, 405.969f, 404.256f, 246.019f, + -8.42969f, 131.365f, -648.051f}; + +// Compare results with 1 batch +TEST(Analyzer_Resnet50_ipu, compare_results_1_batch) { + std::string model_dir = FLAGS_infer_model + "/" + "model"; + AnalysisConfig config; + // num_ipu, enable_pipelining, batches_per_step, batch_size, + // need_avg_shard + config.EnableIpu(1, false); + config.SetModel(model_dir + "/model", model_dir + "/params"); + + std::vector inputs; + auto predictor = CreatePaddlePredictor(config); + const int batch = 1; + const int channel = 3; + const int height = 318; + const int width = 318; + const int input_num = batch * channel * height * width; + std::vector input(input_num, 1); + + PaddleTensor in; + in.shape = {batch, channel, height, width}; + in.data = + PaddleBuf(static_cast(input.data()), input_num * sizeof(float)); + in.dtype = PaddleDType::FLOAT32; + inputs.emplace_back(in); + + std::vector outputs; + + ASSERT_TRUE(predictor->Run(inputs, &outputs)); + + const size_t expected_size = 1; + EXPECT_EQ(outputs.size(), expected_size); + float* data_o = static_cast(outputs[0].data.data()); + + for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); j += 10) { + EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., + 12e-5); + } +} + +// Compare results with 2 batch +TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) { + std::string model_dir = FLAGS_infer_model + "/" + "model"; + AnalysisConfig config; + // num_ipu, enable_pipelining, batches_per_step, batch_size, + // need_avg_shard + config.EnableIpu(2, false, 1, 2, 1); + config.SetModel(model_dir + "/model", model_dir + "/params"); + + std::vector inputs; + auto predictor = CreatePaddlePredictor(config); + const int batch = 2; + const int channel = 3; + const int height = 318; + const int width = 318; + const int input_num = batch * channel * height * width; + std::vector input(input_num, 1); + + PaddleTensor in; + in.shape = {batch, channel, height, width}; + in.data = + PaddleBuf(static_cast(input.data()), input_num * sizeof(float)); + in.dtype = PaddleDType::FLOAT32; + inputs.emplace_back(in); + + std::vector outputs; + + ASSERT_TRUE(predictor->Run(inputs, &outputs)); + + const size_t expected_size = 1; + EXPECT_EQ(outputs.size(), expected_size); + float* data_o = static_cast(outputs[0].data.data()); + + auto num_output_per_batch = outputs[0].data.length() / sizeof(float) / 2; + for (size_t j = 0; j < num_output_per_batch; j += 10) { + EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., + 12e-5); + EXPECT_NEAR((data_o[j + num_output_per_batch] - truth_values[j / 10]) / + truth_values[j / 10], + 0., 12e-5); + } +} + +} // namespace inference +} // namespace paddle \ No newline at end of file