From 311b3b44fc7d51d4d66d90ab8a3fc0d42231afda Mon Sep 17 00:00:00 2001 From: ShenLiang Date: Wed, 9 Dec 2020 10:37:20 +0800 Subject: [PATCH] =?UTF-8?q?Fix=20the=20bug=20where=20embedding=20can?= =?UTF-8?q?=E2=80=98t=20be=20processed=20correctly=20in=20reducer=20(#2948?= =?UTF-8?q?5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix the bug of reducer in embedding * add comment --- python/paddle/fluid/dygraph/parallel.py | 10 ++- .../parallel_dygraph_sparse_embedding_fp64.py | 88 ++++++++++++++++--- 2 files changed, 85 insertions(+), 13 deletions(-) diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index d7576ddc70a..77a0308a533 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -24,8 +24,8 @@ from paddle.fluid.dygraph import layers from paddle.fluid.dygraph import parallel_helper from paddle.fluid.dygraph import to_variable, no_grad from paddle.utils import deprecated -from paddle.fluid.dygraph import nn import warnings +import paddle __all__ = ["prepare_context", "ParallelEnv", "DataParallel"] @@ -419,9 +419,13 @@ class DataParallel(layers.Layer): # NOTE(shenliang03): Here we can only use the attributes to judge whether # parameter is sparse(or SelectedRows). The reason is that the sparse message # can't be obtained when bp hasn't happened yet. So if layer supports sparse parameter, - # we should add the layer here like "nn.Embedding". + # we should add the layer here like "paddle.nn.layer.common.Embedding". def check_layer_sparse(sublayer): - if isinstance(sublayer, nn.Embedding): + if isinstance(sublayer, paddle.nn.layer.common.Embedding): + return sublayer._sparse + # NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding + # is removed in the future, the judgment will also be removed here. + if isinstance(sublayer, paddle.fluid.dygraph.Embedding): return sublayer._is_sparse return False diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py index e7b4e605253..47050b7bfc7 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding_fp64.py @@ -15,14 +15,60 @@ from __future__ import print_function import numpy as np - import paddle -import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Embedding -from paddle.fluid.dygraph.base import to_variable from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase -from parallel_dygraph_sparse_embedding import SimpleNet, fake_sample_reader, TestSparseEmbedding +from paddle.nn import Layer, Embedding +paddle.set_default_dtype("float64") + + +class SimpleNet(Layer): + def __init__(self, + hidden_size, + vocab_size, + num_steps=20, + init_scale=0.1, + is_sparse=False, + dtype="float64"): + super(SimpleNet, self).__init__() + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_steps = num_steps + self.embedding = Embedding( + self.vocab_size, + self.hidden_size, + sparse=True, + weight_attr=paddle.ParamAttr( + name='embedding_param', + initializer=paddle.nn.initializer.Uniform( + low=-init_scale, high=init_scale))) + self.softmax_weight = self.create_parameter( + attr=paddle.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype=dtype, + default_initializer=paddle.nn.initializer.Uniform( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=paddle.ParamAttr(), + shape=[self.vocab_size], + dtype=dtype, + default_initializer=paddle.nn.initializer.Uniform( + low=-self.init_scale, high=self.init_scale)) + + def forward(self, input, label): + x_emb = self.embedding(input) + fc = paddle.matmul(x_emb, self.softmax_weight) + fc = paddle.add(fc, self.softmax_bias) + projection = paddle.reshape(fc, shape=[-1, self.vocab_size]) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = paddle.reshape(loss, shape=[-1, self.num_steps]) + loss = paddle.mean(loss, axis=[0]) + loss = paddle.sum(loss) + + return loss + # global configs batch_size = 4 @@ -33,24 +79,46 @@ num_steps = 3 init_scale = 0.1 -class TestSparseEmbeddingFP64(TestSparseEmbedding): +def fake_sample_reader(): + def __reader__(): + for i in range(batch_num): + x_data = np.arange(num_steps).astype('int64') + y_data = np.arange(1, 1 + num_steps).astype('int64') + yield x_data, y_data + + return __reader__ + + +class TestSparseEmbeddingFP64(TestParallelDyGraphRunnerBase): def get_model(self): model = SimpleNet( hidden_size=hidden_size, vocab_size=vocab_size, num_steps=num_steps, init_scale=init_scale, - is_sparse=True, - dtype="float64") + is_sparse=True) train_reader = paddle.batch( fake_sample_reader(), batch_size=batch_size, drop_last=True) - optimizer = fluid.optimizer.SGD(learning_rate=0.001, - parameter_list=model.parameters()) + optimizer = paddle.optimizer.SGD(learning_rate=0.001, + parameters=model.parameters()) return model, train_reader, optimizer + def run_one_loop(self, model, optimizer, batch): + x_data = np.array([x[0].reshape(3) for x in batch]).astype('int64') + y_data = np.array([x[1].reshape(3) for x in batch]).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + + dy_loss = model(x, y) + + return dy_loss + if __name__ == "__main__": runtime_main(TestSparseEmbeddingFP64) -- GitLab