diff --git a/paddle/fluid/memory/stats.h b/paddle/fluid/memory/stats.h
index 0906567dbf6c17d993b53a128add88608708e12a..b4850a8e9e919b88a5284bfbcfd54631a14911ec 100644
--- a/paddle/fluid/memory/stats.h
+++ b/paddle/fluid/memory/stats.h
@@ -80,8 +80,8 @@ class Stat : public StatBase {
       while (prev_value < current_value &&
              !peak_value_.compare_exchange_weak(prev_value, current_value)) {
       }
-      VLOG(8) << "Update peak_value, after update, peak_value = " << peak_value_
-              << " , current value = " << current_value;
+      VLOG(8) << "Update peak_value, after update, peak_value = "
+              << peak_value_.load() << " , current value = " << current_value;
     }
   }
 
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
index 20214428fab36309ecd3c3bb35973f679140c946..5bf705864ef3c9f17f73322f9c15e28cce6d60c7 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
@@ -341,21 +341,26 @@ IpuStrategy::IpuStrategy() {
         return std::to_string(popart_options.partialsTypeMatMuls == "half");
       });
 
-  RegisterSetter(
-      container_options, "dot_checks",
-      [&](const std::pair<std::string, std::string>& p) {
-        std::uint64_t value = std::stoul(p.first);
-        popart_options.dotChecks.insert(static_cast<popart::DotCheck>(value));
-      });
+  RegisterSetter(container_options, "dot_checks",
+                 [&](const std::pair<std::string, std::string>& p) {
+                   std::vector<std::string> valid_dot{"Fwd0", "Fwd1", "Bwd0",
+                                                      "PreAlias", "Final"};
+                   if (std::find(valid_dot.begin(), valid_dot.end(), p.first) ==
+                       valid_dot.end()) {
+                     PADDLE_THROW(platform::errors::InvalidArgument(
+                         "Unknown dot check: %s", p.first));
+                   }
+                   popart_options.dotChecks.insert(p.first);
+                 });
 
-  RegisterGetter(
-      vector_options_getter, options_type, "dot_checks", "vector", [&]() {
-        std::vector<std::string> res;
-        for (auto x : popart_options.dotChecks) {
-          res.push_back(std::to_string(static_cast<std::uint64_t>(x)));
-        }
-        return res;
-      });
+  RegisterGetter(vector_options_getter, options_type, "dot_checks", "vector",
+                 [&]() {
+                   std::vector<std::string> res;
+                   for (auto x : popart_options.dotChecks) {
+                     res.push_back(x);
+                   }
+                   return res;
+                 });
 
   RegisterSetter(container_options, "hardware_instrumentations",
                  [&](const std::pair<std::string, std::string>& p) {
@@ -516,6 +521,21 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor,
   }
 }
 
+void IpuStrategy::SetReplicatedCollectivesSettings(const std::string& opt,
+                                                   bool value) {
+  VLOG(10) << "Set Replica Setting " << opt << " to " << value;
+  if (opt == "prepare_schedule_for_merging_collectives") {
+    popart_options.replicatedCollectivesSettings
+        .prepareScheduleForMergingCollectives = value;
+  } else if (opt == "merge_all_reduce_collectives") {
+    popart_options.replicatedCollectivesSettings.mergeAllReduceCollectives =
+        value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Unknown option ' %s' for replicated collectives settings", opt));
+  }
+}
+
 void IpuStrategy::SetAccumulateOuterFragmentSettings(
     const std::uint64_t& schedule, const std::vector<int>& values) {
   VLOG(10) << "SetAccumulateOuterFragmentSettings schedule:" << schedule;
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h
index fa57dcd676d81293f3b38eb3f219a57b5d8a08c3..da08c76fb90d1313085cc2587e668423c62f6831 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.h
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h
@@ -118,6 +118,7 @@ class IpuStrategy {
                               const std::string &value);
   void SetTensorLocation(const std::string &tensor, const std::string &option,
                          std::uint64_t value);
+  void SetReplicatedCollectivesSettings(const std::string &opt, bool value);
   void SetAccumulateOuterFragmentSettings(const std::uint64_t &schedule,
                                           const std::vector<int> &values);
   void AddCustomOp(const std::string &paddle_op, const std::string &popart_op,
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 602a0345b04fe98b820ab6ab79fd6568c978cfed..b7ecf0985083831a5c732321f34c9c2324eba1ec 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -4394,6 +4394,12 @@ All parameter, weight, gradient are variables in Paddle.
                          option_name, option.first.cast<std::string>(),
                          option.second.cast<std::uint64_t>());
                    }
+                 } else if (option_name == "replicated_collectives_settings") {
+                   for (auto option : element.second.cast<py::dict>()) {
+                     self.SetReplicatedCollectivesSettings(
+                         option.first.cast<std::string>(),
+                         option.second.cast<bool>());
+                   }
                  } else if (option_name == "accumulate_outer_fragment") {
                    for (auto option : element.second.cast<py::dict>()) {
                      std::vector<int> values;
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
index 45f75f1b4df81ef883f8faba0e96bbf54d7c761a..21a6655406729e930e892bfb50b1e287ef3ed96e 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
@@ -27,12 +27,13 @@ class TestIpuStrategy(unittest.TestCase):
         ipu_strategy = paddle.static.IpuStrategy()
         all_option_names = ipu_strategy._ipu_strategy.get_all_option_names()
         skip_options = []
+        skip_options.append(
+            'mean_accumulation_and_replication_reduction_strategy')
         skip_options.append('random_seed')
 
         for option_name in all_option_names:
             if option_name in skip_options:
                 continue
-
             option = ipu_strategy._ipu_strategy.get_option(option_name)
             option_type = option['type']
             option_value = option['value']
@@ -67,7 +68,7 @@ class TestIpuStrategy(unittest.TestCase):
     def test_set_other_options(self):
         ipu_strategy = paddle.static.IpuStrategy()
         options = {}
-        options['dot_checks'] = ['0', '1', '2', '3']
+        options['dot_checks'] = ['Fwd0', 'Fwd1', 'Bwd0', 'PreAlias', "Final"]
         options['engine_options'] = {
             'debug.allowOutOfMemory': 'true',
             'autoReport.directory': 'path',
@@ -76,7 +77,12 @@ class TestIpuStrategy(unittest.TestCase):
         options['random_seed'] = 1234
         for k, v in options.items():
             ipu_strategy.set_options({k: v})
-            assert v == ipu_strategy.get_option(k), f"set {k} to {v} failed "
+            if (isinstance(v, list)):
+                assert v.sort() == ipu_strategy.get_option(k).sort(
+                ), f"set {k} to {v} failed "
+            else:
+                assert v == ipu_strategy.get_option(
+                    k), f"set {k} to {v} failed "
 
         # The custom logger need 2 int as inputs
         logger = lambda progress, total: print(f"compile progrss: {progress}/{total}")
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py
index 792b88849faf32090bbea83ae55647703def88f5..884162d336f352dd967f8c023948a5912ed5404a 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py
@@ -148,6 +148,36 @@ class TestReplicaInference(TestBase):
         }
 
 
+class TestReplicaCollectiveInference(TestBase):
+    def set_attrs(self):
+        self.ipu_options = {
+            "batches_per_step": 1,
+            "enable_pipelining": False,
+            "enable_gradient_accumulation": False,
+            "accumulation_factor": 1,
+            "enable_replicated_graphs": True,
+            "replicated_graph_count": 2,
+            "accumulate_outer_fragment": {
+                0: []
+            },
+            "replicated_collectives_settings": {
+                "prepare_schedule_for_merging_collectives": True,
+                "merge_all_reduce_collectives": True
+            }
+        }
+        self.cpu_bs = 1
+        self.ipu_bs = 1
+
+    def set_data_feed(self):
+        np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
+        self.feed_cpu = {"image": np_image}
+        self.feed_ipu = {
+            "image":
+            np.tile(np_image,
+                    [self.ipu_options['replicated_graph_count'], 1, 1, 1])
+        }
+
+
 class TestPipelineInference(TestBase):
     def set_attrs(self):
         self.ipu_options = {
@@ -190,6 +220,36 @@ class TestTrainBase(TestBase):
 
 
 class TestReplicaTrain(TestTrainBase):
+    def set_attrs(self):
+        self.ipu_options = {
+            "batches_per_step": 1,
+            "enable_pipelining": False,
+            "enable_gradient_accumulation": False,
+            "accumulation_factor": 1,
+            "enable_replicated_graphs": True,
+            "replicated_graph_count": 2
+        }
+        self.cpu_bs = 2
+        self.ipu_bs = 1
+        self.optimizer = 'sgd'
+
+    def set_data_feed(self):
+        np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
+        self.feed_cpu = {"image": np.tile(np_image, [self.cpu_bs, 1, 1, 1])}
+        self.feed_ipu = {
+            "image":
+            np.tile(np_image,
+                    [self.ipu_options['replicated_graph_count'], 1, 1, 1])
+        }
+
+    def test(self):
+        cpu_outputs = self._test_base(False)
+        ipu_outputs = self._test_base(True)[::2]
+
+        self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol))
+
+
+class TestReplicaCollectiveTrain(TestTrainBase):
     def set_attrs(self):
         self.ipu_options = {
             "batches_per_step": 1,
@@ -198,6 +258,13 @@ class TestReplicaTrain(TestTrainBase):
             "accumulation_factor": 1,
             "enable_replicated_graphs": True,
             "replicated_graph_count": 2,
+            "accumulate_outer_fragment": {
+                0: []
+            },
+            "replicated_collectives_settings": {
+                "prepare_schedule_for_merging_collectives": True,
+                "merge_all_reduce_collectives": True
+            }
         }
         self.cpu_bs = 2
         self.ipu_bs = 1
diff --git a/tools/dockerfile/Dockerfile.ipu b/tools/dockerfile/Dockerfile.ipu
index 08536ae401fe174f3630c259dce68b4fd038d8d3..d6c46245e501c4ea138756e5c6a0fd5ef07d4c9c 100644
--- a/tools/dockerfile/Dockerfile.ipu
+++ b/tools/dockerfile/Dockerfile.ipu
@@ -6,7 +6,7 @@
 # run a container
 # docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash
 
-FROM graphcore/poplar:2.3.0
+FROM graphcore/poplar:poplar-extbaidu:2.5.0-ubuntu-18.04-20220407
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
 
 # ENV variables
@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-ut
             bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \
             openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++
 RUN apt-get update && apt-get install -y rdma-core librdmacm1
+RUN apt-get update && apt-get install libspdlog-dev
 
 # Downgrade gcc&&g++
 WORKDIR /usr/bin