未验证 提交 27acc6c3 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] update to popart v2.5.0 (#42552)

* update to popart v2.5.0

* use a specific version of sdk2.5.0
上级 c4bed7e4
...@@ -80,8 +80,8 @@ class Stat : public StatBase { ...@@ -80,8 +80,8 @@ class Stat : public StatBase {
while (prev_value < current_value && while (prev_value < current_value &&
!peak_value_.compare_exchange_weak(prev_value, current_value)) { !peak_value_.compare_exchange_weak(prev_value, current_value)) {
} }
VLOG(8) << "Update peak_value, after update, peak_value = " << peak_value_ VLOG(8) << "Update peak_value, after update, peak_value = "
<< " , current value = " << current_value; << peak_value_.load() << " , current value = " << current_value;
} }
} }
......
...@@ -341,21 +341,26 @@ IpuStrategy::IpuStrategy() { ...@@ -341,21 +341,26 @@ IpuStrategy::IpuStrategy() {
return std::to_string(popart_options.partialsTypeMatMuls == "half"); return std::to_string(popart_options.partialsTypeMatMuls == "half");
}); });
RegisterSetter( RegisterSetter(container_options, "dot_checks",
container_options, "dot_checks", [&](const std::pair<std::string, std::string>& p) {
[&](const std::pair<std::string, std::string>& p) { std::vector<std::string> valid_dot{"Fwd0", "Fwd1", "Bwd0",
std::uint64_t value = std::stoul(p.first); "PreAlias", "Final"};
popart_options.dotChecks.insert(static_cast<popart::DotCheck>(value)); if (std::find(valid_dot.begin(), valid_dot.end(), p.first) ==
}); valid_dot.end()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown dot check: %s", p.first));
}
popart_options.dotChecks.insert(p.first);
});
RegisterGetter( RegisterGetter(vector_options_getter, options_type, "dot_checks", "vector",
vector_options_getter, options_type, "dot_checks", "vector", [&]() { [&]() {
std::vector<std::string> res; std::vector<std::string> res;
for (auto x : popart_options.dotChecks) { for (auto x : popart_options.dotChecks) {
res.push_back(std::to_string(static_cast<std::uint64_t>(x))); res.push_back(x);
} }
return res; return res;
}); });
RegisterSetter(container_options, "hardware_instrumentations", RegisterSetter(container_options, "hardware_instrumentations",
[&](const std::pair<std::string, std::string>& p) { [&](const std::pair<std::string, std::string>& p) {
...@@ -516,6 +521,21 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor, ...@@ -516,6 +521,21 @@ void IpuStrategy::SetTensorLocation(const std::string& tensor,
} }
} }
void IpuStrategy::SetReplicatedCollectivesSettings(const std::string& opt,
bool value) {
VLOG(10) << "Set Replica Setting " << opt << " to " << value;
if (opt == "prepare_schedule_for_merging_collectives") {
popart_options.replicatedCollectivesSettings
.prepareScheduleForMergingCollectives = value;
} else if (opt == "merge_all_reduce_collectives") {
popart_options.replicatedCollectivesSettings.mergeAllReduceCollectives =
value;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown option ' %s' for replicated collectives settings", opt));
}
}
void IpuStrategy::SetAccumulateOuterFragmentSettings( void IpuStrategy::SetAccumulateOuterFragmentSettings(
const std::uint64_t& schedule, const std::vector<int>& values) { const std::uint64_t& schedule, const std::vector<int>& values) {
VLOG(10) << "SetAccumulateOuterFragmentSettings schedule:" << schedule; VLOG(10) << "SetAccumulateOuterFragmentSettings schedule:" << schedule;
......
...@@ -118,6 +118,7 @@ class IpuStrategy { ...@@ -118,6 +118,7 @@ class IpuStrategy {
const std::string &value); const std::string &value);
void SetTensorLocation(const std::string &tensor, const std::string &option, void SetTensorLocation(const std::string &tensor, const std::string &option,
std::uint64_t value); std::uint64_t value);
void SetReplicatedCollectivesSettings(const std::string &opt, bool value);
void SetAccumulateOuterFragmentSettings(const std::uint64_t &schedule, void SetAccumulateOuterFragmentSettings(const std::uint64_t &schedule,
const std::vector<int> &values); const std::vector<int> &values);
void AddCustomOp(const std::string &paddle_op, const std::string &popart_op, void AddCustomOp(const std::string &paddle_op, const std::string &popart_op,
......
...@@ -4394,6 +4394,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -4394,6 +4394,12 @@ All parameter, weight, gradient are variables in Paddle.
option_name, option.first.cast<std::string>(), option_name, option.first.cast<std::string>(),
option.second.cast<std::uint64_t>()); option.second.cast<std::uint64_t>());
} }
} else if (option_name == "replicated_collectives_settings") {
for (auto option : element.second.cast<py::dict>()) {
self.SetReplicatedCollectivesSettings(
option.first.cast<std::string>(),
option.second.cast<bool>());
}
} else if (option_name == "accumulate_outer_fragment") { } else if (option_name == "accumulate_outer_fragment") {
for (auto option : element.second.cast<py::dict>()) { for (auto option : element.second.cast<py::dict>()) {
std::vector<int> values; std::vector<int> values;
......
...@@ -27,12 +27,13 @@ class TestIpuStrategy(unittest.TestCase): ...@@ -27,12 +27,13 @@ class TestIpuStrategy(unittest.TestCase):
ipu_strategy = paddle.static.IpuStrategy() ipu_strategy = paddle.static.IpuStrategy()
all_option_names = ipu_strategy._ipu_strategy.get_all_option_names() all_option_names = ipu_strategy._ipu_strategy.get_all_option_names()
skip_options = [] skip_options = []
skip_options.append(
'mean_accumulation_and_replication_reduction_strategy')
skip_options.append('random_seed') skip_options.append('random_seed')
for option_name in all_option_names: for option_name in all_option_names:
if option_name in skip_options: if option_name in skip_options:
continue continue
option = ipu_strategy._ipu_strategy.get_option(option_name) option = ipu_strategy._ipu_strategy.get_option(option_name)
option_type = option['type'] option_type = option['type']
option_value = option['value'] option_value = option['value']
...@@ -67,7 +68,7 @@ class TestIpuStrategy(unittest.TestCase): ...@@ -67,7 +68,7 @@ class TestIpuStrategy(unittest.TestCase):
def test_set_other_options(self): def test_set_other_options(self):
ipu_strategy = paddle.static.IpuStrategy() ipu_strategy = paddle.static.IpuStrategy()
options = {} options = {}
options['dot_checks'] = ['0', '1', '2', '3'] options['dot_checks'] = ['Fwd0', 'Fwd1', 'Bwd0', 'PreAlias', "Final"]
options['engine_options'] = { options['engine_options'] = {
'debug.allowOutOfMemory': 'true', 'debug.allowOutOfMemory': 'true',
'autoReport.directory': 'path', 'autoReport.directory': 'path',
...@@ -76,7 +77,12 @@ class TestIpuStrategy(unittest.TestCase): ...@@ -76,7 +77,12 @@ class TestIpuStrategy(unittest.TestCase):
options['random_seed'] = 1234 options['random_seed'] = 1234
for k, v in options.items(): for k, v in options.items():
ipu_strategy.set_options({k: v}) ipu_strategy.set_options({k: v})
assert v == ipu_strategy.get_option(k), f"set {k} to {v} failed " if (isinstance(v, list)):
assert v.sort() == ipu_strategy.get_option(k).sort(
), f"set {k} to {v} failed "
else:
assert v == ipu_strategy.get_option(
k), f"set {k} to {v} failed "
# The custom logger need 2 int as inputs # The custom logger need 2 int as inputs
logger = lambda progress, total: print(f"compile progrss: {progress}/{total}") logger = lambda progress, total: print(f"compile progrss: {progress}/{total}")
......
...@@ -148,6 +148,36 @@ class TestReplicaInference(TestBase): ...@@ -148,6 +148,36 @@ class TestReplicaInference(TestBase):
} }
class TestReplicaCollectiveInference(TestBase):
def set_attrs(self):
self.ipu_options = {
"batches_per_step": 1,
"enable_pipelining": False,
"enable_gradient_accumulation": False,
"accumulation_factor": 1,
"enable_replicated_graphs": True,
"replicated_graph_count": 2,
"accumulate_outer_fragment": {
0: []
},
"replicated_collectives_settings": {
"prepare_schedule_for_merging_collectives": True,
"merge_all_reduce_collectives": True
}
}
self.cpu_bs = 1
self.ipu_bs = 1
def set_data_feed(self):
np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
self.feed_cpu = {"image": np_image}
self.feed_ipu = {
"image":
np.tile(np_image,
[self.ipu_options['replicated_graph_count'], 1, 1, 1])
}
class TestPipelineInference(TestBase): class TestPipelineInference(TestBase):
def set_attrs(self): def set_attrs(self):
self.ipu_options = { self.ipu_options = {
...@@ -190,6 +220,36 @@ class TestTrainBase(TestBase): ...@@ -190,6 +220,36 @@ class TestTrainBase(TestBase):
class TestReplicaTrain(TestTrainBase): class TestReplicaTrain(TestTrainBase):
def set_attrs(self):
self.ipu_options = {
"batches_per_step": 1,
"enable_pipelining": False,
"enable_gradient_accumulation": False,
"accumulation_factor": 1,
"enable_replicated_graphs": True,
"replicated_graph_count": 2
}
self.cpu_bs = 2
self.ipu_bs = 1
self.optimizer = 'sgd'
def set_data_feed(self):
np_image = np.random.rand(1, 3, 10, 10).astype(np.float32)
self.feed_cpu = {"image": np.tile(np_image, [self.cpu_bs, 1, 1, 1])}
self.feed_ipu = {
"image":
np.tile(np_image,
[self.ipu_options['replicated_graph_count'], 1, 1, 1])
}
def test(self):
cpu_outputs = self._test_base(False)
ipu_outputs = self._test_base(True)[::2]
self.assertTrue(np.allclose(cpu_outputs, ipu_outputs, atol=self.atol))
class TestReplicaCollectiveTrain(TestTrainBase):
def set_attrs(self): def set_attrs(self):
self.ipu_options = { self.ipu_options = {
"batches_per_step": 1, "batches_per_step": 1,
...@@ -198,6 +258,13 @@ class TestReplicaTrain(TestTrainBase): ...@@ -198,6 +258,13 @@ class TestReplicaTrain(TestTrainBase):
"accumulation_factor": 1, "accumulation_factor": 1,
"enable_replicated_graphs": True, "enable_replicated_graphs": True,
"replicated_graph_count": 2, "replicated_graph_count": 2,
"accumulate_outer_fragment": {
0: []
},
"replicated_collectives_settings": {
"prepare_schedule_for_merging_collectives": True,
"merge_all_reduce_collectives": True
}
} }
self.cpu_bs = 2 self.cpu_bs = 2
self.ipu_bs = 1 self.ipu_bs = 1
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
# run a container # run a container
# docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash # docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash
FROM graphcore/poplar:2.3.0 FROM graphcore/poplar:poplar-extbaidu:2.5.0-ubuntu-18.04-20220407
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
# ENV variables # ENV variables
...@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-ut ...@@ -25,6 +25,7 @@ RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-ut
bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \ bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \
openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++ openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++
RUN apt-get update && apt-get install -y rdma-core librdmacm1 RUN apt-get update && apt-get install -y rdma-core librdmacm1
RUN apt-get update && apt-get install libspdlog-dev
# Downgrade gcc&&g++ # Downgrade gcc&&g++
WORKDIR /usr/bin WORKDIR /usr/bin
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册