diff --git a/paddle/fluid/memory/stats.h b/paddle/fluid/memory/stats.h index 0906567dbf6c17d993b53a128add88608708e12a..b4850a8e9e919b88a5284bfbcfd54631a14911ec 100644 --- a/paddle/fluid/memory/stats.h +++ b/paddle/fluid/memory/stats.h @@ -80,8 +80,8 @@ class Stat : public StatBase { while (prev_value < current_value && !peak_value_.compare_exchange_weak(prev_value, current_value)) { } - VLOG(8) << "Update peak_value, after update, peak_value = " << peak_value_ - << " , current value = " << current_value; + VLOG(8) << "Update peak_value, after update, peak_value = " + << peak_value_.load() << " , current value = " << current_value; } } diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index 20214428fab36309ecd3c3bb35973f679140c946..5bf705864ef3c9f17f73322f9c15e28cce6d60c7 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -341,21 +341,26 @@ IpuStrategy::IpuStrategy() { return std::to_string(popart_options.partialsTypeMatMuls == "half"); }); - RegisterSetter( - container_options, "dot_checks", - [&](const std::pair& p) { - std::uint64_t value = std::stoul(p.first); - popart_options.dotChecks.insert(static_cast(value)); - }); + RegisterSetter(container_options, "dot_checks", + [&](const std::pair& p) { + std::vector valid_dot{"Fwd0", "Fwd1", "Bwd0", + "PreAlias", "Final"}; + if (std::find(valid_dot.begin(), valid_dot.end(), p.first) == + valid_dot.end()) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unknown dot check: %s", p.first)); + } + popart_options.dotChecks.insert(p.first); + }); - RegisterGetter( - vector_options_getter, options_type, "dot_checks", "vector", [&]() { - std::vector res; - for (auto x : popart_options.dotChecks) { - res.push_back(std::to_string(static_cast(x))); - } - return res; - }); + RegisterGetter(vector_options_getter, options_type, "dot_checks", "vector", + [&]() { + std::vector res; + for (auto x : popart_options.dotChecks) { + res.push_back(x); + } + return res; + }); RegisterSetter(container_options, "hardware_instrumentations", [&](const std::pair& p) { @@ -516,6 +521,21 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor, } } +void IpuStrategy::SetReplicatedCollectivesSettings(const std::string& opt, + bool value) { + VLOG(10) << "Set Replica Setting " << opt << " to " << value; + if (opt == "prepare_schedule_for_merging_collectives") { + popart_options.replicatedCollectivesSettings + .prepareScheduleForMergingCollectives = value; + } else if (opt == "merge_all_reduce_collectives") { + popart_options.replicatedCollectivesSettings.mergeAllReduceCollectives = + value; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unknown option ' %s' for replicated collectives settings", opt)); + } +} + void IpuStrategy::SetAccumulateOuterFragmentSettings( const std::uint64_t& schedule, const std::vector& values) { VLOG(10) << "SetAccumulateOuterFragmentSettings schedule:" << schedule; diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index fa57dcd676d81293f3b38eb3f219a57b5d8a08c3..da08c76fb90d1313085cc2587e668423c62f6831 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -118,6 +118,7 @@ class IpuStrategy { const std::string &value); void SetTensorLocation(const std::string &tensor, const std::string &option, std::uint64_t value); + void SetReplicatedCollectivesSettings(const std::string &opt, bool value); void SetAccumulateOuterFragmentSettings(const std::uint64_t &schedule, const std::vector &values); void AddCustomOp(const std::string &paddle_op, const std::string &popart_op, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 602a0345b04fe98b820ab6ab79fd6568c978cfed..b7ecf0985083831a5c732321f34c9c2324eba1ec 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -4394,6 +4394,12 @@ All parameter, weight, gradient are variables in Paddle. option_name, option.first.cast(), option.second.cast()); } + } else if (option_name == "replicated_collectives_settings") { + for (auto option : element.second.cast()) { + self.SetReplicatedCollectivesSettings( + option.first.cast(), + option.second.cast()); + } } else if (option_name == "accumulate_outer_fragment") { for (auto option : element.second.cast()) { std::vector values; diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py index 45f75f1b4df81ef883f8faba0e96bbf54d7c761a..21a6655406729e930e892bfb50b1e287ef3ed96e 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py @@ -27,12 +27,13 @@ class TestIpuStrategy(unittest.TestCase): ipu_strategy = paddle.static.IpuStrategy() all_option_names = ipu_strategy._ipu_strategy.get_all_option_names() skip_options = [] + skip_options.append( + 'mean_accumulation_and_replication_reduction_strategy') skip_options.append('random_seed') for option_name in all_option_names: if option_name in skip_options: continue - option = ipu_strategy._ipu_strategy.get_option(option_name) option_type = option['type'] option_value = option['value'] @@ -67,7 +68,7 @@ class TestIpuStrategy(unittest.TestCase): def test_set_other_options(self): ipu_strategy = paddle.static.IpuStrategy() options = {} - options['dot_checks'] = ['0', '1', '2', '3'] + options['dot_checks'] = ['Fwd0', 'Fwd1', 'Bwd0', 'PreAlias', "Final"] options['engine_options'] = { 'debug.allowOutOfMemory': 'true', 'autoReport.directory': 'path', @@ -76,7 +77,12 @@ class TestIpuStrategy(unittest.TestCase): options['random_seed'] = 1234 for k, v in options.items(): ipu_strategy.set_options({k: v}) - assert v == ipu_strategy.get_option(k), f"set {k} to {v} failed " + if (isinstance(v, list)): + assert v.sort() == ipu_strategy.get_option(k).sort( + ), f"set {k} to {v} failed " + else: + assert v == ipu_strategy.get_option( + k), f"set {k} to {v} failed " # The custom logger need 2 int as inputs logger = lambda progress, total: print(f"compile progrss: {progress}/{total}") diff --git a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py index 792b88849faf32090bbea83ae55647703def88f5..884162d336f352dd967f8c023948a5912ed5404a 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_model_parallel_ipu.py @@ -148,6 +148,36 @@ class TestReplicaInference(TestBase): } +class TestReplicaCollectiveInference(TestBase): + def set_attrs(self): + self.ipu_options = { + "batches_per_step": 1, + "enable_pipelining": False, + "enable_gradient_accumulation": False, + "accumulation_factor": 1, + "enable_replicated_graphs": True, + "replicated_graph_count": 2, + "accumulate_outer_fragment": { + 0: [] + }, + "replicated_collectives_settings": { + "prepare_schedule_for_merging_collectives": True, + "merge_all_reduce_collectives": True + } + } + self.cpu_bs = 1 + self.ipu_bs = 1 + + def set_data_feed(self): + np_image = np.random.rand(1, 3, 10, 10).astype(np.float32) + self.feed_cpu = {"image": np_image} + self.feed_ipu = { + "image": + np.tile(np_image, + [self.ipu_options['replicated_graph_count'], 1, 1, 1]) + } + + class TestPipelineInference(TestBase): def set_attrs(self): self.ipu_options = { @@ -190,6 +220,36 @@ class TestTrainBase(TestBase): class TestReplicaTrain(TestTrainBase): + def set_attrs(self): + self.ipu_options = { + "batches_per_step": 1, + "enable_pipelining": False, + "enable_gradient_accumulation": False, + "accumulation_factor": 1, + "enable_replicated_graphs": True, + "replicated_graph_count": 2 + } + self.cpu_bs = 2 + self.ipu_bs = 1 + self.optimizer = 'sgd' + + def set_data_feed(self): + np_image = np.random.rand(1, 3, 10, 10).astype(np.float32) + self.feed_cpu = {"image": np.tile(np_image, [self.cpu_bs, 1, 1, 1])} + self.feed_ipu = { + "image": + np.tile(np_image, + [self.ipu_options['replicated_graph_count'], 1, 1, 1]) + } + + def test(self): + cpu_outputs = self._test_base(False) + ipu_outputs = self._test_base(True)[::2] + + self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol)) + + +class TestReplicaCollectiveTrain(TestTrainBase): def set_attrs(self): self.ipu_options = { "batches_per_step": 1, @@ -198,6 +258,13 @@ class TestReplicaTrain(TestTrainBase): "accumulation_factor": 1, "enable_replicated_graphs": True, "replicated_graph_count": 2, + "accumulate_outer_fragment": { + 0: [] + }, + "replicated_collectives_settings": { + "prepare_schedule_for_merging_collectives": True, + "merge_all_reduce_collectives": True + } } self.cpu_bs = 2 self.ipu_bs = 1 diff --git a/tools/dockerfile/Dockerfile.ipu b/tools/dockerfile/Dockerfile.ipu index 08536ae401fe174f3630c259dce68b4fd038d8d3..d6c46245e501c4ea138756e5c6a0fd5ef07d4c9c 100644 --- a/tools/dockerfile/Dockerfile.ipu +++ b/tools/dockerfile/Dockerfile.ipu @@ -6,7 +6,7 @@ # run a container # docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash -FROM graphcore/poplar:2.3.0 +FROM graphcore/poplar:poplar-extbaidu:2.5.0-ubuntu-18.04-20220407 MAINTAINER PaddlePaddle Authors # ENV variables @@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-ut bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \ openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++ RUN apt-get update && apt-get install -y rdma-core librdmacm1 +RUN apt-get update && apt-get install libspdlog-dev # Downgrade gcc&&g++ WORKDIR /usr/bin