未验证 提交 311b3b44 编写于 作者: S ShenLiang 提交者: GitHub

Fix the bug where embedding can‘t be processed correctly in reducer (#29485)

* fix the bug of reducer in embedding

* add comment
上级 560b4323
...@@ -24,8 +24,8 @@ from paddle.fluid.dygraph import layers ...@@ -24,8 +24,8 @@ from paddle.fluid.dygraph import layers
from paddle.fluid.dygraph import parallel_helper from paddle.fluid.dygraph import parallel_helper
from paddle.fluid.dygraph import to_variable, no_grad from paddle.fluid.dygraph import to_variable, no_grad
from paddle.utils import deprecated from paddle.utils import deprecated
from paddle.fluid.dygraph import nn
import warnings import warnings
import paddle
__all__ = ["prepare_context", "ParallelEnv", "DataParallel"] __all__ = ["prepare_context", "ParallelEnv", "DataParallel"]
...@@ -419,9 +419,13 @@ class DataParallel(layers.Layer): ...@@ -419,9 +419,13 @@ class DataParallel(layers.Layer):
# NOTE(shenliang03): Here we can only use the attributes to judge whether # NOTE(shenliang03): Here we can only use the attributes to judge whether
# parameter is sparse(or SelectedRows). The reason is that the sparse message # parameter is sparse(or SelectedRows). The reason is that the sparse message
# can't be obtained when bp hasn't happened yet. So if layer supports sparse parameter, # can't be obtained when bp hasn't happened yet. So if layer supports sparse parameter,
# we should add the layer here like "nn.Embedding". # we should add the layer here like "paddle.nn.layer.common.Embedding".
def check_layer_sparse(sublayer): def check_layer_sparse(sublayer):
if isinstance(sublayer, nn.Embedding): if isinstance(sublayer, paddle.nn.layer.common.Embedding):
return sublayer._sparse
# NOTE(shenliang03):This is for compatibility. If paddle.fluid.dygraph.Embedding
# is removed in the future, the judgment will also be removed here.
if isinstance(sublayer, paddle.fluid.dygraph.Embedding):
return sublayer._is_sparse return sublayer._is_sparse
return False return False
......
...@@ -15,14 +15,60 @@ ...@@ -15,14 +15,60 @@
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Embedding
from paddle.fluid.dygraph.base import to_variable
from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
from parallel_dygraph_sparse_embedding import SimpleNet, fake_sample_reader, TestSparseEmbedding from paddle.nn import Layer, Embedding
paddle.set_default_dtype("float64")
class SimpleNet(Layer):
def __init__(self,
hidden_size,
vocab_size,
num_steps=20,
init_scale=0.1,
is_sparse=False,
dtype="float64"):
super(SimpleNet, self).__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.init_scale = init_scale
self.num_steps = num_steps
self.embedding = Embedding(
self.vocab_size,
self.hidden_size,
sparse=True,
weight_attr=paddle.ParamAttr(
name='embedding_param',
initializer=paddle.nn.initializer.Uniform(
low=-init_scale, high=init_scale)))
self.softmax_weight = self.create_parameter(
attr=paddle.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype=dtype,
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=paddle.ParamAttr(),
shape=[self.vocab_size],
dtype=dtype,
default_initializer=paddle.nn.initializer.Uniform(
low=-self.init_scale, high=self.init_scale))
def forward(self, input, label):
x_emb = self.embedding(input)
fc = paddle.matmul(x_emb, self.softmax_weight)
fc = paddle.add(fc, self.softmax_bias)
projection = paddle.reshape(fc, shape=[-1, self.vocab_size])
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
loss = paddle.mean(loss, axis=[0])
loss = paddle.sum(loss)
return loss
# global configs # global configs
batch_size = 4 batch_size = 4
...@@ -33,24 +79,46 @@ num_steps = 3 ...@@ -33,24 +79,46 @@ num_steps = 3
init_scale = 0.1 init_scale = 0.1
class TestSparseEmbeddingFP64(TestSparseEmbedding): def fake_sample_reader():
def __reader__():
for i in range(batch_num):
x_data = np.arange(num_steps).astype('int64')
y_data = np.arange(1, 1 + num_steps).astype('int64')
yield x_data, y_data
return __reader__
class TestSparseEmbeddingFP64(TestParallelDyGraphRunnerBase):
def get_model(self): def get_model(self):
model = SimpleNet( model = SimpleNet(
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_steps=num_steps, num_steps=num_steps,
init_scale=init_scale, init_scale=init_scale,
is_sparse=True, is_sparse=True)
dtype="float64")
train_reader = paddle.batch( train_reader = paddle.batch(
fake_sample_reader(), batch_size=batch_size, drop_last=True) fake_sample_reader(), batch_size=batch_size, drop_last=True)
optimizer = fluid.optimizer.SGD(learning_rate=0.001, optimizer = paddle.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters()) parameters=model.parameters())
return model, train_reader, optimizer return model, train_reader, optimizer
def run_one_loop(self, model, optimizer, batch):
x_data = np.array([x[0].reshape(3) for x in batch]).astype('int64')
y_data = np.array([x[1].reshape(3) for x in batch]).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
x = paddle.to_tensor(x_data)
y = paddle.to_tensor(y_data)
dy_loss = model(x, y)
return dy_loss
if __name__ == "__main__": if __name__ == "__main__":
runtime_main(TestSparseEmbeddingFP64) runtime_main(TestSparseEmbeddingFP64)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册