[IPU] update to popart v2.5.0 (#42552)

* update to popart v2.5.0 * use a specific version of sdk2.5.0

[IPU] update to popart v2.5.0 (#42552)
* update to popart v2.5.0 * use a specific version of sdk2.5.0
27acc6c3 · Allen Guo · GitHub · c4bed7e4 · 27acc6c3 · 27acc6c3
7 changed file
--- a/paddle/fluid/memory/stats.h
+++ b/paddle/fluid/memory/stats.h
@@ -80,8 +80,8 @@ class Stat : public StatBase {
      while (prev_value < current_value &&
             !peak_value_.compare_exchange_weak(prev_value, current_value)) {
      }
-      VLOG(8) << "Update peak_value, after update, peak_value = " << peak_value_
+      VLOG(8) << "Update peak_value, after update, peak_value = "
-              << " , current value = " << current_value;
+              << peak_value_.load() << " , current value = " << current_value;
    }
  }

--- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
@@ -341,21 +341,26 @@ IpuStrategy::IpuStrategy() {
        return std::to_string(popart_options.partialsTypeMatMuls == "half");
      });
-  RegisterSetter(
+  RegisterSetter(container_options, "dot_checks",
-      container_options, "dot_checks",
+                 [&](const std::pair<std::string, std::string>& p) {
-      [&](const std::pair<std::string, std::string>& p) {
+                   std::vector<std::string> valid_dot{"Fwd0", "Fwd1", "Bwd0",
-        std::uint64_t value = std::stoul(p.first);
+                                                      "PreAlias", "Final"};
-        popart_options.dotChecks.insert(static_cast<popart::DotCheck>(value));
+                   if (std::find(valid_dot.begin(), valid_dot.end(), p.first) ==
-      });
+                       valid_dot.end()) {
+                     PADDLE_THROW(platform::errors::InvalidArgument(
+                         "Unknown dot check: %s", p.first));
+                   }
+                   popart_options.dotChecks.insert(p.first);
+                 });
-  RegisterGetter(
+  RegisterGetter(vector_options_getter, options_type, "dot_checks", "vector",
-      vector_options_getter, options_type, "dot_checks", "vector", [&]() {
+                 [&]() {
-        std::vector<std::string> res;
+                   std::vector<std::string> res;
-        for (auto x : popart_options.dotChecks) {
+                   for (auto x : popart_options.dotChecks) {
-          res.push_back(std::to_string(static_cast<std::uint64_t>(x)));
+                     res.push_back(x);
-        }
+                   }
-        return res;
+                   return res;
-      });
+                 });
  RegisterSetter(container_options, "hardware_instrumentations",
                 [&](const std::pair<std::string, std::string>& p) {
@@ -516,6 +521,21 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor,
  }
 }
+void IpuStrategy::SetReplicatedCollectivesSettings(const std::string& opt,
+                                                   bool value) {
+  VLOG(10) << "Set Replica Setting " << opt << " to " << value;
+  if (opt == "prepare_schedule_for_merging_collectives") {
+    popart_options.replicatedCollectivesSettings
+        .prepareScheduleForMergingCollectives = value;
+  } else if (opt == "merge_all_reduce_collectives") {
+    popart_options.replicatedCollectivesSettings.mergeAllReduceCollectives =
+        value;
+  } else {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Unknown option ' %s' for replicated collectives settings", opt));
+  }
+}
 void IpuStrategy::SetAccumulateOuterFragmentSettings(
    const std::uint64_t& schedule, const std::vector<int>& values) {
  VLOG(10) << "SetAccumulateOuterFragmentSettings schedule:" << schedule;

--- a/paddle/fluid/platform/device/ipu/ipu_strategy.h
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h
@@ -118,6 +118,7 @@ class IpuStrategy {
                              const std::string &value);
  void SetTensorLocation(const std::string &tensor, const std::string &option,
                         std::uint64_t value);
+  void SetReplicatedCollectivesSettings(const std::string &opt, bool value);
  void SetAccumulateOuterFragmentSettings(const std::uint64_t &schedule,
                                          const std::vector<int> &values);
  void AddCustomOp(const std::string &paddle_op, const std::string &popart_op,

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -4394,6 +4394,12 @@ All parameter, weight, gradient are variables in Paddle.
                         option_name, option.first.cast<std::string>(),
                         option.second.cast<std::uint64_t>());
                   }
+                 } else if (option_name == "replicated_collectives_settings") {
+                   for (auto option : element.second.cast<py::dict>()) {
+                     self.SetReplicatedCollectivesSettings(
+                         option.first.cast<std::string>(),
+                         option.second.cast<bool>());
+                   }
                 } else if (option_name == "accumulate_outer_fragment") {
                   for (auto option : element.second.cast<py::dict>()) {
                     std::vector<int> values;

--- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
@@ -27,12 +27,13 @@ class TestIpuStrategy(unittest.TestCase):
        ipu_strategy = paddle.static.IpuStrategy()
        all_option_names = ipu_strategy._ipu_strategy.get_all_option_names()
        skip_options = []
+        skip_options.append(
+            'mean_accumulation_and_replication_reduction_strategy')
        skip_options.append('random_seed')
        for option_name in all_option_names:
            if option_name in skip_options:
                continue
            option = ipu_strategy._ipu_strategy.get_option(option_name)
            option_type = option['type']
            option_value = option['value']
@@ -67,7 +68,7 @@ class TestIpuStrategy(unittest.TestCase):
    def test_set_other_options(self):
        ipu_strategy = paddle.static.IpuStrategy()
        options = {}
-        options['dot_checks'] = ['0', '1', '2', '3']
+        options['dot_checks'] = ['Fwd0', 'Fwd1', 'Bwd0', 'PreAlias', "Final"]
        options['engine_options'] = {
            'debug.allowOutOfMemory': 'true',
            'autoReport.directory': 'path',
@@ -76,7 +77,12 @@ class TestIpuStrategy(unittest.TestCase):
        options['random_seed'] = 1234
        for k, v in options.items():
            ipu_strategy.set_options({k: v})
-            assert v == ipu_strategy.get_option(k), f"set {k} to {v} failed "
+            if (isinstance(v, list)):
+                assert v.sort() == ipu_strategy.get_option(k).sort(
+                ), f"set {k} to {v} failed "
+            else:
+                assert v == ipu_strategy.get_option(
+                    k), f"set {k} to {v} failed "
        # The custom logger need 2 int as inputs
        logger = lambda progress, total: print(f"compile progrss: {progress}/{total}")

--- a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py
@@ -148,6 +148,36 @@ class TestReplicaInference(TestBase):
        }
+class TestReplicaCollectiveInference(TestBase):
+    def set_attrs(self):
+        self.ipu_options = {
+            "batches_per_step": 1,
+            "enable_pipelining": False,
+            "enable_gradient_accumulation": False,
+            "accumulation_factor": 1,
+            "enable_replicated_graphs": True,
+            "replicated_graph_count": 2,
+            "accumulate_outer_fragment": {
+                0: []
+            },
+            "replicated_collectives_settings": {
+                "prepare_schedule_for_merging_collectives": True,
+                "merge_all_reduce_collectives": True
+            }
+        }
+        self.cpu_bs = 1
+        self.ipu_bs = 1
+    def set_data_feed(self):
+        np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
+        self.feed_cpu = {"image": np_image}
+        self.feed_ipu = {
+            "image":
+            np.tile(np_image,
+                    [self.ipu_options['replicated_graph_count'], 1, 1, 1])
+        }
 class TestPipelineInference(TestBase):
    def set_attrs(self):
        self.ipu_options = {
@@ -190,6 +220,36 @@ class TestTrainBase(TestBase):
 class TestReplicaTrain(TestTrainBase):
+    def set_attrs(self):
+        self.ipu_options = {
+            "batches_per_step": 1,
+            "enable_pipelining": False,
+            "enable_gradient_accumulation": False,
+            "accumulation_factor": 1,
+            "enable_replicated_graphs": True,
+            "replicated_graph_count": 2
+        }
+        self.cpu_bs = 2
+        self.ipu_bs = 1
+        self.optimizer = 'sgd'
+    def set_data_feed(self):
+        np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
+        self.feed_cpu = {"image": np.tile(np_image, [self.cpu_bs, 1, 1, 1])}
+        self.feed_ipu = {
+            "image":
+            np.tile(np_image,
+                    [self.ipu_options['replicated_graph_count'], 1, 1, 1])
+        }
+    def test(self):
+        cpu_outputs = self._test_base(False)
+        ipu_outputs = self._test_base(True)[::2]
+        self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol))
+class TestReplicaCollectiveTrain(TestTrainBase):
    def set_attrs(self):
        self.ipu_options = {
            "batches_per_step": 1,
@@ -198,6 +258,13 @@ class TestReplicaTrain(TestTrainBase):
            "accumulation_factor": 1,
            "enable_replicated_graphs": True,
            "replicated_graph_count": 2,
+            "accumulate_outer_fragment": {
+                0: []
+            },
+            "replicated_collectives_settings": {
+                "prepare_schedule_for_merging_collectives": True,
+                "merge_all_reduce_collectives": True
+            }
        }
        self.cpu_bs = 2
        self.ipu_bs = 1

--- a/tools/dockerfile/Dockerfile.ipu
+++ b/tools/dockerfile/Dockerfile.ipu
@@ -6,7 +6,7 @@
 # run a container
 # docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash
-FROM graphcore/poplar:2.3.0
+FROM graphcore/poplar:poplar-extbaidu:2.5.0-ubuntu-18.04-20220407
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
 # ENV variables
@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-ut
            bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \
            openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++
 RUN apt-get update && apt-get install -y rdma-core librdmacm1
+RUN apt-get update && apt-get install libspdlog-dev
 # Downgrade gcc&&g++
 WORKDIR /usr/bin