diff --git a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
index ceef27ac1ce3c0a8ecd15f86a2dbae098059e0a8..0878192ebf8d529bbfa8d3dfcc5ad2d821e24813 100644
--- a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc
@@ -13,14 +13,12 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h"
-
-#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
-
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
 #include "paddle/fluid/framework/ir/pass_tester_helper.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/variable_helper.h"
+#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 3c66e35abf1d59a43e86541614a56ad78f957416..6ff25597125c5f0b13ee603bc17329a351074a8b 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -49,6 +49,9 @@ if(WIN32 AND WITH_GPU)
   cc_library(paddle_inference DEPS ${fluid_modules} ${pten_modules} ${STATIC_INFERENCE_API})
 else()
   create_static_lib(paddle_inference ${fluid_modules} ${pten_modules} ${STATIC_INFERENCE_API})
+  if(WITH_IPU)
+    target_link_libraries(paddle_inference -Wl,--allow-multiple-definition popart_canonicalization_utils)
+  endif()
 endif()
 
 if(NOT APPLE)
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index ad96a4e3437bebd67bd20430f6b0ff3af19aafbf..aff2f60551de93755af34ec742feaab08f32c8ca 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -273,6 +273,14 @@ struct Argument {
   DECL_ARGUMENT_FIELD(cpu_math_library_num_threads, CpuMathLibraryNumThreads,
                       int);
 
+  // ipu related
+  DECL_ARGUMENT_FIELD(use_ipu, UseIpu, bool);
+  DECL_ARGUMENT_FIELD(ipu_device_num, IpuDeviceNum, int);
+  DECL_ARGUMENT_FIELD(ipu_enable_pipelining, IpuEnablePipelining, bool);
+  DECL_ARGUMENT_FIELD(ipu_batches_per_step, IpuBatchesPerStep, int);
+  DECL_ARGUMENT_FIELD(ipu_batch_size, IpuBatchSize, int);
+  DECL_ARGUMENT_FIELD(ipu_need_avg_shard, IpuNeedAvgShard, bool);
+
  private:
   std::unordered_set<std::string> valid_fields_;
 };
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
index 188b2ff851d96fa76edd666c696d98ddb1dcb948..fe6a27f80725f8e6520c0988f195419eb8a0cc1d 100644
--- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
@@ -65,6 +65,27 @@ void IrGraphBuildPass::RunImpl(Argument *argument) {
                           platform::errors::PreconditionNotMet(
                               "The scope ptr should not be nullptr."));
   argument->main_graph().SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
+
+// ipu related
+#ifdef PADDLE_WITH_IPU
+  if (argument->Has("use_ipu")) {
+    if (argument->use_ipu()) {
+      argument->main_graph().SetNotOwned("num_ipus",
+                                         &argument->ipu_device_num());
+      argument->main_graph().SetNotOwned("need_avg_shard",
+                                         &argument->ipu_need_avg_shard());
+      argument->main_graph().SetNotOwned("enable_pipelining",
+                                         &argument->ipu_enable_pipelining());
+      argument->main_graph().SetNotOwned("batches_per_step",
+                                         &argument->ipu_batches_per_step());
+      argument->main_graph().SetNotOwned("batch_size",
+                                         &argument->ipu_batch_size());
+    } else {
+      PADDLE_THROW(
+          platform::errors::Unimplemented("Please compile with WITH_IPU"));
+    }
+  }
+#endif
 }
 
 std::unique_ptr<framework::ProgramDesc> IrGraphBuildPass::LoadModel(
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index 49c4b8d7372e276de7b0979d8c4b9505f9453c91..a1ab69906bfc443d7441647a68f3a4fa1be5e3b1 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -46,6 +46,9 @@ PassStrategy *AnalysisConfig::pass_builder() const {
       pass_builder_.reset(new XpuPassStrategy);
     } else if (use_npu_) {
       pass_builder_.reset(new NpuPassStrategy);
+    } else if (use_ipu_) {
+      LOG(INFO) << "Create IPU IR passes";
+      pass_builder_.reset(new IpuPassStrategy);
     } else {
       LOG(INFO) << "Create CPU IR passes";
       pass_builder_.reset(new CpuPassStrategy);
@@ -139,6 +142,20 @@ void AnalysisConfig::EnableNpu(int device_id) {
 
   Update();
 }
+void AnalysisConfig::EnableIpu(int device_num, bool ipu_enable_pipelining,
+                               int ipu_batches_per_step, int ipu_batch_size,
+                               bool ipu_need_avg_shard) {
+  enable_ir_optim_ = true;
+
+  use_ipu_ = true;
+  ipu_device_num_ = device_num;
+  ipu_enable_pipelining_ = ipu_enable_pipelining;
+  ipu_batches_per_step_ = ipu_batches_per_step;
+  ipu_batch_size_ = ipu_batch_size;
+  ipu_need_avg_shard_ = ipu_need_avg_shard;
+
+  Update();
+}
 
 AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
 #define CP_MEMBER(member__) member__ = other.member__;
@@ -233,12 +250,23 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
 
   CP_MEMBER(thread_local_stream_);
 
+  // ipu related
+  CP_MEMBER(use_ipu_);
+  CP_MEMBER(ipu_device_num_);
+  CP_MEMBER(ipu_enable_pipelining_);
+  CP_MEMBER(ipu_batches_per_step_);
+  CP_MEMBER(ipu_batch_size_);
+  CP_MEMBER(ipu_need_avg_shard_);
+
   if (use_gpu_) {
     PADDLE_ENFORCE_EQ(use_xpu_, false,
                       platform::errors::InvalidArgument(
                           "Only one choice can be made between CPU and XPU."));
     pass_builder_.reset(new GpuPassStrategy(
         *static_cast<GpuPassStrategy *>(other.pass_builder())));
+  } else if (use_ipu_) {
+    pass_builder_.reset(new IpuPassStrategy(
+        *static_cast<IpuPassStrategy *>(other.pass_builder())));
   } else if (use_xpu_) {
     pass_builder_.reset(new XpuPassStrategy(
         *static_cast<XpuPassStrategy *>(other.pass_builder())));
@@ -413,7 +441,8 @@ void AnalysisConfig::Update() {
   // Transfer pass_builder and copy the existing compatible passes.
   if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
       ((use_xpu() ^ pass_builder_->use_xpu())) ||
-      ((use_npu() ^ pass_builder_->use_npu()))) {
+      ((use_npu() ^ pass_builder_->use_npu())) ||
+      ((use_ipu() ^ pass_builder_->use_ipu()))) {
     if (use_gpu()) {
       pass_builder_.reset(new GpuPassStrategy);
 
@@ -421,6 +450,9 @@ void AnalysisConfig::Update() {
         // Append after the Affine_channel_conv_fuse pass.
         pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
       }
+    } else if (use_ipu()) {
+      VLOG(1) << "IpuPassStrategy has been used for new.";
+      pass_builder_.reset(new IpuPassStrategy);
     } else if (use_xpu()) {
       PADDLE_ENFORCE_EQ(
           use_gpu(), false,
@@ -441,6 +473,10 @@ void AnalysisConfig::Update() {
     if (use_gpu()) {
       pass_builder_.reset(new GpuPassStrategy(
           *static_cast<GpuPassStrategy *>(pass_builder_.get())));
+    } else if (use_ipu()) {
+      VLOG(1) << "IpuPassStrategy has been used.";
+      pass_builder_.reset(new IpuPassStrategy(
+          *static_cast<IpuPassStrategy *>(pass_builder_.get())));
     } else if (use_xpu()) {
       PADDLE_ENFORCE_EQ(
           use_gpu(), false,
@@ -565,6 +601,13 @@ void AnalysisConfig::Update() {
         "with NPU-runtime."));
 #endif
   }
+  if (use_ipu_) {
+#ifndef PADDLE_WITH_IPU
+    PADDLE_THROW(platform::errors::Unavailable(
+        "You tried to enable the ipu "
+        "but did not have the option -DWITH_IPU compiled."));
+#endif
+  }
 
   if (ir_debug_) {
     pass_builder()->TurnOnDebug();
@@ -635,6 +678,13 @@ std::string AnalysisConfig::SerializeInfoCache() {
 
   ss << thread_local_stream_;
 
+  ss << use_ipu_;
+  ss << ipu_device_num_;
+  ss << ipu_enable_pipelining_;
+  ss << ipu_batches_per_step_;
+  ss << ipu_batch_size_;
+  ss << ipu_need_avg_shard_;
+
   return ss.str();
 }
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 2293b702468532e9782e2a9477c0cb9e5afa6d57..5d5719533e7a745e67949152ff2a83c1b06f2d06 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -110,6 +110,14 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
     // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
     std::memcpy(static_cast<void *>(input_ptr), pt.data.data(),
                 pt.data.length());
+  } else if (platform::is_ipu_place(place)) {
+#ifdef PADDLE_WITH_IPU
+    std::memcpy(static_cast<void *>(input_ptr), pt.data.data(),
+                pt.data.length());
+#else
+    PADDLE_THROW(paddle::platform::errors::Fatal(
+        "Not compile with WITH_IPU, should not reach here."));
+#endif
   } else if (platform::is_gpu_place(place)) {
     PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), false,
                       platform::errors::InvalidArgument(
@@ -294,6 +302,14 @@ bool AnalysisPredictor::CreateExecutor() {
                                         "engine), but Paddle was not compiled "
                                         "with LITE_WITH_NNADAPTER."));
     }
+  } else if (config_.use_ipu()) {
+#ifdef PADDLE_WITH_IPU
+    place_ = paddle::platform::IPUPlace();
+#else
+    PADDLE_THROW(platform::errors::Unavailable(
+        "You tried to use IPU forward propagation, but Paddle was not compiled "
+        "with WITH_IPU."));
+#endif
   } else {
     place_ = paddle::platform::CPUPlace();
   }
@@ -643,6 +659,13 @@ void AnalysisPredictor::PrepareArgument() {
     LOG(INFO) << "Lite subgraph engine is enabled";
   }
 
+  argument_.SetUseIpu(config_.use_ipu_);
+  argument_.SetIpuDeviceNum(config_.ipu_device_num());
+  argument_.SetIpuEnablePipelining(config_.ipu_enable_pipelining_);
+  argument_.SetIpuBatchesPerStep(config_.ipu_batches_per_step_);
+  argument_.SetIpuBatchSize(config_.ipu_batch_size_);
+  argument_.SetIpuNeedAvgShard(config_.ipu_need_avg_shard_);
+
   if (config_.use_mkldnn_) {
     LOG(INFO) << "MKLDNN is enabled";
     argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_);
@@ -916,6 +939,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
   res->SetName(name);
   if (platform::is_cpu_place(place_)) {
     res->SetPlace(PaddlePlace::kCPU);
+  } else if (platform::is_ipu_place(place_)) {
+    // Currently, IPUPlace's tensor copy between cpu and ipu has been set in
+    // IpuBackend.
+    res->SetPlace(PaddlePlace::kCPU);
   } else if (platform::is_xpu_place(place_)) {
     if (config_.lite_engine_enabled()) {
       // Currently, Paddle-Lite's XPU user interface only supports the transfer
@@ -951,6 +978,10 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
   res->SetName(name);
   if (platform::is_cpu_place(place_)) {
     res->SetPlace(PaddlePlace::kCPU);
+  } else if (platform::is_ipu_place(place_)) {
+    // Currently, IPUPlace's tensor copy between cpu and ipu has been set in
+    // IpuBackend.
+    res->SetPlace(PaddlePlace::kCPU);
   } else if (platform::is_xpu_place(place_)) {
     if (config_.lite_engine_enabled()) {
       // Currently, Paddle-Lite's XPU user interface only supports the transfer
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index abe197f656b6013ca8bf45530239a9d7934189e5..01d4dbccd50eaf2c288110562784bdea5a66080b 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -239,6 +239,14 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
       std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
 #else
     std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
+#endif
+  } else if (paddle::platform::is_ipu_place(t_place)) {
+#ifdef PADDLE_WITH_IPU
+    std::memcpy(static_cast<void *>(data), t_data, ele_num * sizeof(T));
+#else
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "Can not create tensor with IPU place because paddle is not compiled "
+        "with IPU."));
 #endif
   } else if (place_ == PlaceType::kGPU) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index f381b5fb23e4b81b09fa58ff038b2e90f9470c1f..77409f95b042eac630363e38bdb7994d5ba1096a 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -230,6 +230,24 @@ struct PD_INFER_DECL AnalysisConfig {
                  bool autotune = true, const std::string& autotune_file = "",
                  const std::string& precision = "int16",
                  bool adaptive_seqlen = false);
+
+  ///
+  /// \brief Turn on IPU.
+  ///
+  /// \param device_num The number of IPUs.
+  /// \param ipu_enable_pipelining Enable data pipelining between subgraphs,
+  /// each subgraph is settled on an IPU. (This feature requires the number of
+  /// IPUs > 1.)
+  /// \param ipu_batches_per_step The number of micro_batch_size per run. (This
+  /// feature requires to enable pipelining.)
+  /// \param ipu_batch_size The micro_batch_size which is the batch_size in the
+  /// graph.
+  /// \param ipu_need_avg_shard Enable the auto graph sharding. (This feature
+  /// requires the number of IPUs > 1.)
+  ///
+  void EnableIpu(int device_num = 1, bool ipu_enable_pipelining = false,
+                 int ipu_batches_per_step = 1, int ipu_batch_size = 1,
+                 bool ipu_need_avg_shard = false);
   ///
   /// \brief Set XPU device id.
   ///
@@ -260,6 +278,11 @@ struct PD_INFER_DECL AnalysisConfig {
   /// \return bool Whether the NPU is turned on.
   ///
   bool use_npu() const { return use_npu_; }
+  /// \brief A boolean state telling whether the IPU is turned on.
+  ///
+  /// \return bool Whether the IPU is turned on.
+  ///
+  bool use_ipu() const { return use_ipu_; }
   ///
   /// \brief Get the GPU device id.
   ///
@@ -278,6 +301,11 @@ struct PD_INFER_DECL AnalysisConfig {
   /// \return int The NPU device id.
   ///
   int npu_device_id() const { return npu_device_id_; }
+  /// \brief Get the the number of IPU device .
+  ///
+  /// \return int The number of IPU device.
+  ///
+  int ipu_device_num() const { return ipu_device_num_; }
   ///
   /// \brief Get the initial size in MB of the GPU memory pool.
   ///
@@ -840,6 +868,15 @@ struct PD_INFER_DECL AnalysisConfig {
   bool use_mkldnn_bfloat16_{false};
   std::unordered_set<std::string> bfloat16_enabled_op_types_;
 
+  // ipu related.
+  bool use_ipu_{false};
+  int ipu_device_num_{1};
+
+  bool ipu_enable_pipelining_{false};
+  int ipu_batches_per_step_{1};
+  int ipu_batch_size_{1};
+  bool ipu_need_avg_shard_{false};
+
   // If the config is already used on a predictor, it becomes invalid.
   // Any config can only be used with one predictor.
   // Variables held by config can take up a lot of memory in some cases.
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index de2de112344eef1013c7cfef9a79811cd2acdc10..9023da40328e82ad94278d1c77e760fcd315442f 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -300,4 +300,8 @@ void CpuPassStrategy::EnableMkldnnBfloat16() {
 #endif
 }
 
+IpuPassStrategy::IpuPassStrategy() : PassStrategy({}) {
+  passes_.assign({"inference_process_pass"});
+}
+
 }  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h
index f25060cd091b6a2209580565dd21f0bebc58fbfb..351cf71e5ca7493928dfd81d776d847463f3b7bf 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -148,6 +148,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
   /// \return A bool variable implying whether we are in npu mode.
   bool use_npu() const { return use_npu_; }
 
+  /// \brief Check if we are using ipu.
+  /// \return A bool variable implying whether we are in ipu mode.
+  bool use_ipu() const { return use_ipu_; }
+
   /// \brief Default destructor.
   virtual ~PassStrategy() = default;
 
@@ -156,6 +160,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
   bool use_xpu_{false};
   bool use_gpu_{false};
   bool use_npu_{false};
+  bool use_ipu_{false};
   bool use_mkldnn_{false};
   /// \endcond
 };
@@ -259,6 +264,22 @@ class PD_INFER_DECL NpuPassStrategy final : public PassStrategy {
   }
 };
 
+/// \class IpuPassStrategy
+/// \brief The IPU passes controller, it is used in AnalysisPredictor with IPU
+/// mode.
+class PD_INFER_DECL IpuPassStrategy final : public PassStrategy {
+ public:
+  /// \brief Default constructor of IpuPassStrategy.
+  IpuPassStrategy();
+
+  /// \brief Construct by copying another IpuPassStrategy object.
+  /// \param[in] other The IpuPassStrategy object we want to copy.
+  explicit IpuPassStrategy(const IpuPassStrategy &other)
+      : PassStrategy(other.AllPasses()) {
+    use_ipu_ = true;
+  }
+};
+
 /// \brief List of tensorRT subgraph passes.
 PD_INFER_DECL extern const std::vector<std::string> kTRTSubgraphPasses;
 
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index a28b0c172aff0e7bea822b8bef7beb3a87945581..9dafd0d17c7157c0e351b67d0a01fccccbdbc47a 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -757,3 +757,12 @@ endif()
 if(ON_INFER OR WITH_GPU)
     set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120)
 endif()
+
+# IPU
+if (WITH_IPU)
+    #resnet50
+    set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50")
+    inference_analysis_test(ipu_resnet50_test SRCS ipu_resnet50_test.cc
+        EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
+        ARGS --infer_model=${RESNET50_MODEL_DIR} --warmup=true --repeat=1000)
+endif()
diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
index d6ff3e422368bd9427e4cd3412429baf571c3303..77ec8ee7bfeeb80a36252aeffb369c22f1ff7eb8 100644
--- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
@@ -122,5 +122,51 @@ TEST(Analyzer_Ernie, compare_results) {
   }
 }
 
+#ifdef PADDLE_WITH_IPU
+// IPU: Compare Deterministic result
+TEST(Analyzer_Ernie_ipu, ipu_compare_determine) {
+  AnalysisConfig cfg;
+  SetIpuConfig(&cfg);
+
+  std::vector<std::vector<PaddleTensor>> input_slots_all;
+  LoadInputData(&input_slots_all);
+  CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
+                       input_slots_all);
+}
+
+// IPU: Compare results
+TEST(Analyzer_Ernie_ipu, ipu_compare_results) {
+  AnalysisConfig cfg;
+  SetIpuConfig(&cfg);
+
+  std::vector<std::vector<PaddleTensor>> input_slots_all;
+  LoadInputData(&input_slots_all);
+
+  std::ifstream fin(FLAGS_refer_result);
+  std::string line;
+  std::vector<float> ref;
+
+  while (std::getline(fin, line)) {
+    Split(line, ' ', &ref);
+  }
+
+  auto predictor = CreateTestPredictor(
+      reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
+      FLAGS_use_analysis);
+
+  std::vector<PaddleTensor> outputs;
+  for (size_t i = 0; i < input_slots_all.size(); i++) {
+    outputs.clear();
+    predictor->Run(input_slots_all[i], &outputs);
+    auto outputs_size = outputs.front().data.length() / (sizeof(float));
+    for (size_t j = 0; j < outputs_size; ++j) {
+      EXPECT_NEAR(ref[i * outputs_size + j],
+                  static_cast<float *>(outputs[0].data.data())[j],
+                  FLAGS_accuracy);
+    }
+  }
+}
+#endif
+
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
index dd3faac7592104ba47c7f7db54c8c0114c8cb1f1..2582a1cb09eef02272f441376cec73b196142f10 100644
--- a/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
+++ b/paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
@@ -148,5 +148,11 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false,
   cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads);
 }
 
+void SetIpuConfig(AnalysisConfig *cfg, int batch_size = 1) {
+  cfg->SetModel(FLAGS_infer_model);
+  // num_ipu, enable_pipelining, batches_per_step, batch_size, need_avg_shard
+  cfg->EnableIpu(4, false, 1, batch_size, true);
+}
+
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ade4a911071ca1a176fc17d783326f2aefe89265
--- /dev/null
+++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc
@@ -0,0 +1,115 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include <cmath>
+
+#include "gflags/gflags.h"
+#include "paddle/fluid/inference/tests/api/tester_helper.h"
+
+namespace paddle {
+namespace inference {
+
+static std::vector<float> truth_values = {
+    127.779f,  738.165f,  1013.22f,  -438.17f,  366.401f,  927.659f,  736.222f,
+    -633.684f, -329.927f, -430.155f, -633.062f, -146.548f, -1324.28f, -1349.36f,
+    -242.675f, 117.448f,  -801.723f, -391.514f, -404.818f, 454.16f,   515.48f,
+    -133.031f, 69.293f,   590.096f,  -1434.69f, -1070.89f, 307.074f,  400.525f,
+    -316.12f,  -587.125f, -161.056f, 800.363f,  -96.4708f, 748.706f,  868.174f,
+    -447.938f, 112.737f,  1127.2f,   47.4355f,  677.72f,   593.186f,  -336.4f,
+    551.362f,  397.823f,  78.3979f,  -715.398f, 405.969f,  404.256f,  246.019f,
+    -8.42969f, 131.365f,  -648.051f};
+
+// Compare results with 1 batch
+TEST(Analyzer_Resnet50_ipu, compare_results_1_batch) {
+  std::string model_dir = FLAGS_infer_model + "/" + "model";
+  AnalysisConfig config;
+  // num_ipu, enable_pipelining, batches_per_step, batch_size,
+  // need_avg_shard
+  config.EnableIpu(1, false);
+  config.SetModel(model_dir + "/model", model_dir + "/params");
+
+  std::vector<PaddleTensor> inputs;
+  auto predictor = CreatePaddlePredictor(config);
+  const int batch = 1;
+  const int channel = 3;
+  const int height = 318;
+  const int width = 318;
+  const int input_num = batch * channel * height * width;
+  std::vector<float> input(input_num, 1);
+
+  PaddleTensor in;
+  in.shape = {batch, channel, height, width};
+  in.data =
+      PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float));
+  in.dtype = PaddleDType::FLOAT32;
+  inputs.emplace_back(in);
+
+  std::vector<PaddleTensor> outputs;
+
+  ASSERT_TRUE(predictor->Run(inputs, &outputs));
+
+  const size_t expected_size = 1;
+  EXPECT_EQ(outputs.size(), expected_size);
+  float* data_o = static_cast<float*>(outputs[0].data.data());
+
+  for (size_t j = 0; j < outputs[0].data.length() / sizeof(float); j += 10) {
+    EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0.,
+                12e-5);
+  }
+}
+
+// Compare results with 2 batch
+TEST(Analyzer_Resnet50_ipu, compare_results_2_batch) {
+  std::string model_dir = FLAGS_infer_model + "/" + "model";
+  AnalysisConfig config;
+  // num_ipu, enable_pipelining, batches_per_step, batch_size,
+  // need_avg_shard
+  config.EnableIpu(2, false, 1, 2, 1);
+  config.SetModel(model_dir + "/model", model_dir + "/params");
+
+  std::vector<PaddleTensor> inputs;
+  auto predictor = CreatePaddlePredictor(config);
+  const int batch = 2;
+  const int channel = 3;
+  const int height = 318;
+  const int width = 318;
+  const int input_num = batch * channel * height * width;
+  std::vector<float> input(input_num, 1);
+
+  PaddleTensor in;
+  in.shape = {batch, channel, height, width};
+  in.data =
+      PaddleBuf(static_cast<void*>(input.data()), input_num * sizeof(float));
+  in.dtype = PaddleDType::FLOAT32;
+  inputs.emplace_back(in);
+
+  std::vector<PaddleTensor> outputs;
+
+  ASSERT_TRUE(predictor->Run(inputs, &outputs));
+
+  const size_t expected_size = 1;
+  EXPECT_EQ(outputs.size(), expected_size);
+  float* data_o = static_cast<float*>(outputs[0].data.data());
+
+  auto num_output_per_batch = outputs[0].data.length() / sizeof(float) / 2;
+  for (size_t j = 0; j < num_output_per_batch; j += 10) {
+    EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0.,
+                12e-5);
+    EXPECT_NEAR((data_o[j + num_output_per_batch] - truth_values[j / 10]) /
+                    truth_values[j / 10],
+                0., 12e-5);
+  }
+}
+
+}  // namespace inference
+}  // namespace paddle
\ No newline at end of file