From a01bc6b31ddd6fd65ffde553bb3f057b09f253f9 Mon Sep 17 00:00:00 2001 From: danleifeng <52735331+danleifeng@users.noreply.github.com> Date: Wed, 30 Sep 2020 15:13:04 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90paddle.fleet=E3=80=91fleet=20support?= =?UTF-8?q?=20non=5Fdistributed=20training=20in=20dygraph=20mode=20(#27714?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fleet support non_distributed training in dygraph mode; test=develop --- .../distributed/fleet/base/fleet_base.py | 2 + .../fluid/tests/unittests/test_fleet_base.py | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 3fdd6e92483..7eb3a565965 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -187,6 +187,8 @@ class Fleet(object): self.strategy_compiler = StrategyCompiler() if paddle.fluid.framework.in_dygraph_mode(): + if self.worker_num() == 1: + return if parallel_helper._is_parallel_ctx_initialized(): warnings.warn( "The dygraph parallel environment has been initialized.") diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 4945c158025..3d4b2e218f7 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -18,6 +18,7 @@ import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import os import paddle.fluid as fluid +import paddle.nn as nn import numpy as np @@ -170,6 +171,44 @@ class TestFleetDygraph(unittest.TestCase): final_strategy = fleet._final_strategy() +class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + +class TestFleetDygraphSingle(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213" + os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" + os.environ["PADDLE_TRAINERS_NUM"] = "1" + os.environ["PADDLE_TRAINER_ID"] = "0" + + def test_dygraph_single(self): + paddle.disable_static() + fleet.init(is_collective=True) + + layer = LinearNet() + loss_fn = nn.MSELoss() + adam = paddle.optimizer.Adam( + learning_rate=0.001, parameters=layer.parameters()) + + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + for step in range(2): + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + loss.backward() + adam.step() + adam.clear_grad() + + class TestFleetBaseSingleRunCollective(unittest.TestCase): def setUp(self): os.environ.pop("PADDLE_TRAINER_ENDPOINTS") -- GitLab