未验证 提交 1ce0a09e 编写于 作者: J Jiabin Yang 提交者: GitHub

fix con2d transpose bias by create and init it in build_once (#18968)

* fix con2d transpose bias by create and init it in build_onee

* fix API spec

* test=develop, invoke ci

* fix bias_attr and act has no effect error on layer norm, conv2dTranpose, billinearTensorProduct, sequece_conv. fix original_mode not used error on GRUunit. fix sample_weight not set error on NCE. Add ut for all thoese layer

* test=develop, change success standard for conv2dTranspose

* test=develop, fix test_layers to invoke some error branch

* test=develop, fix sample code

* test=develop, fix BilinearTensorProduct failed in dygraph mode

* test=develop, fix test_layers segment fault error
上级 4ef6b845
......@@ -676,7 +676,7 @@ paddle.fluid.dygraph.LayerNorm.state_dict (ArgSpec(args=['self', 'destination',
paddle.fluid.dygraph.LayerNorm.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.LayerNorm.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.NCE ('paddle.fluid.dygraph.nn.NCE', ('document', '47eb439a5568468fad70235f1e61ead9'))
paddle.fluid.dygraph.NCE.__init__ (ArgSpec(args=['self', 'name_scope', 'num_total_classes', 'param_attr', 'bias_attr', 'num_neg_samples', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, 'uniform', None, 0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.NCE.__init__ (ArgSpec(args=['self', 'name_scope', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, 'uniform', None, 0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.NCE.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1'))
paddle.fluid.dygraph.NCE.add_sublayer (ArgSpec(args=['self', 'name', 'sublayer'], varargs=None, keywords=None, defaults=None), ('document', '839ff3c0534677ba6ad8735c3fd4e995'))
paddle.fluid.dygraph.NCE.backward (ArgSpec(args=['self'], varargs='inputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
......@@ -23,6 +23,7 @@ from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np
import logging
__all__ = [
'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit',
......@@ -1374,6 +1375,10 @@ class LayerNorm(layers.Layer):
shape=param_shape,
dtype=self._dtype,
default_initializer=Constant(1.0))
else:
if self._param_attr:
logging.warn("param_attr are only avaliable with scale is True")
if self._shift:
assert self._bias_attr is not False
self._bias_w = self.create_parameter(
......@@ -1381,6 +1386,9 @@ class LayerNorm(layers.Layer):
shape=param_shape,
dtype=self._dtype,
is_bias=True)
else:
if self._bias_attr:
logging.warn("bias_attr are only avaliable with shift is True")
def forward(self, input):
inputs = dict()
......@@ -1410,7 +1418,7 @@ class LayerNorm(layers.Layer):
"begin_norm_axis": self._begin_norm_axis
})
return self._helper.append_activation(layer_norm_out)
return self._helper.append_activation(layer_norm_out, act=self._act)
class GRUUnit(layers.Layer):
......@@ -1648,6 +1656,7 @@ class NCE(layers.Layer):
def __init__(self,
name_scope,
num_total_classes,
sample_weight=None,
param_attr=None,
bias_attr=None,
num_neg_samples=None,
......@@ -1661,7 +1670,7 @@ class NCE(layers.Layer):
self._num_total_classes = num_total_classes
self._inputs = dict()
self._inputs['SampleWeight'] = sample_weight if sample_weight is not None else []
if sampler == "uniform":
sampler = 0
elif sampler == "log_uniform":
......@@ -1939,17 +1948,17 @@ class BilinearTensorProduct(layers.Layer):
dtype=self._dtype,
is_bias=False)
if self._bias_attr:
bias_size = [1, self._size]
bias = self.create_parameter(
attr=self._bias_attr,
shape=bias_size,
dtype=self._dtype,
is_bias=True)
self._inputs["Bias"] = bias
bias_size = [1, self._size]
self._bias_param = self.create_parameter(
attr=self._bias_attr,
shape=bias_size,
dtype=self._dtype,
is_bias=True)
def forward(self, x, y):
self._inputs = {"X": x, "Y": y, "Weight": self._w}
if self._bias_param:
self._inputs["Bias"] = self._bias_param
if self._name is not None:
out = self._helper.create_variable(
name=".".join([self.full_name(), self._name]),
......@@ -1964,7 +1973,7 @@ class BilinearTensorProduct(layers.Layer):
outputs={"Out": out})
# add activation
return self._helper.append_activation(out)
return self._helper.append_activation(out, act=self._act)
class Conv2DTranspose(layers.Layer):
......@@ -2099,6 +2108,7 @@ class Conv2DTranspose(layers.Layer):
assert param_attr is not False, "param_attr should not be False in conv2d_transpose."
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self._groups = groups
self._num_filters = num_filters
self._use_cudnn = use_cudnn
......@@ -2162,6 +2172,12 @@ class Conv2DTranspose(layers.Layer):
self._img_filter = self.create_parameter(
dtype=input.dtype, shape=filter_shape, attr=self._param_attr)
self._bias_param = self.create_parameter(
attr=self._bias_attr,
shape=[self._num_filters],
dtype=self._dtype,
is_bias=True)
def forward(self, input):
pre_bias = self._helper.create_variable_for_type_inference(
dtype=input.dtype)
......@@ -2179,8 +2195,19 @@ class Conv2DTranspose(layers.Layer):
'use_cudnn': self._use_cudnn
})
pre_act = self._helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
out = self._helper.append_activation(pre_act)
if self._bias_param is not None:
pre_act = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._bias_param]},
outputs={'Out': [pre_act]},
attrs={'axis': 1})
else:
pre_act = pre_bias
out = self._helper.append_activation(pre_act, act=self._act)
return out
......@@ -2230,6 +2257,7 @@ class SequenceConv(layers.Layer):
self._padding = padding
self._bias_attr = bias_attr
self._param_attr = param_attr
self._act = act
def _build_once(self, input):
self._dtype = self._helper.input_dtype(input)
......@@ -2237,6 +2265,12 @@ class SequenceConv(layers.Layer):
self._filter_param = self.create_parameter(
attr=self._param_attr, shape=filter_shape, dtype=self._dtype)
self._bias_param = self.create_parameter(
attr=self._bias_attr,
shape=[self._num_filters],
dtype=self._dtype,
is_bias=True)
def forward(self, input):
pre_bias = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
......@@ -2251,8 +2285,20 @@ class SequenceConv(layers.Layer):
'contextStart': -int(self._filter_size // 2),
'contextLength': self._filter_size
})
pre_act = self._helper.append_bias_op(pre_bias)
return self._helper.append_activation(pre_act)
if self._bias_param is not None:
pre_act = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._bias_param]},
outputs={'Out': [pre_act]},
attrs={'axis': 1})
else:
pre_act = pre_bias
return self._helper.append_activation(pre_act, act=self._act)
class RowConv(layers.Layer):
......@@ -2614,6 +2660,7 @@ class TreeConv(layers.Layer):
out = self.create_variable(
name=self._name, dtype=self._dtype, persistable=False)
else:
out = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
......
......@@ -124,7 +124,10 @@ class TestLayer(LayerTest):
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
ret = layers.layer_norm(t)
ret = layers.layer_norm(
t,
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
static_ret = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret])[0]
with self.static_graph():
......@@ -133,16 +136,34 @@ class TestLayer(LayerTest):
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
lm = nn.LayerNorm('layer_norm')
lm = nn.LayerNorm(
'layer_norm',
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
ret = lm(t)
static_ret2 = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret])[0]
with self.dynamic_graph():
lm = nn.LayerNorm('layer_norm')
lm = nn.LayerNorm(
'layer_norm',
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
dy_ret = lm(base.to_variable(inp))
with self.dynamic_graph():
lm = nn.LayerNorm(
'layer_norm',
shift=False,
scale=False,
param_attr=fluid.initializer.ConstantInitializer(value=1),
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
lm(base.to_variable(inp))
self.assertFalse(hasattr(lm, "_scale_w"))
self.assertFalse(hasattr(lm, "_bias_w"))
self.assertTrue(np.allclose(static_ret, static_ret2))
self.assertTrue(np.allclose(dy_ret.numpy(), static_ret2))
self.assertTrue(np.array_equal(static_ret, static_ret2))
self.assertTrue(np.array_equal(dy_ret.numpy(), static_ret2))
def test_relu(self):
with self.static_graph():
......@@ -313,7 +334,7 @@ class TestLayer(LayerTest):
dtype='float32',
lod_level=1,
append_batch_size=False)
out = layers.sequence_conv(seq, 2)
out = layers.sequence_conv(seq, 2, act='sigmoid')
static_rlt = self.get_static_graph_result(
feed={
"seq_in": fluid.create_lod_tensor(
......@@ -331,7 +352,7 @@ class TestLayer(LayerTest):
dtype='float32',
lod_level=1,
append_batch_size=False)
seq_conv = nn.SequenceConv('seq_conv', num_filters=2)
seq_conv = nn.SequenceConv('seq_conv', num_filters=2, act='sigmoid')
out = seq_conv(seq)
static_rlt2 = self.get_static_graph_result(
feed={
......@@ -343,29 +364,41 @@ class TestLayer(LayerTest):
fetch_list=[out],
with_lod=True)[0]
self.assertTrue(
np.allclose(np.array(static_rlt), np.array(static_rlt2)))
np.array_equal(np.array(static_rlt), np.array(static_rlt2)))
def test_conv2d_transpose(self):
inp_np = np.arange(0, 24).reshape([2, 3, 2, 2]).astype('float32')
with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
out = layers.conv2d_transpose(
input=img, num_filters=10, output_size=28)
input=img,
num_filters=10,
output_size=28,
act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1))
static_rlt = self.get_static_graph_result(
feed={'pixel': inp_np}, fetch_list=[out])[0]
with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_filters=10, output_size=28)
'conv2d_transpose',
num_filters=10,
output_size=28,
act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1))
out = conv2d_transpose(img)
static_rlt2 = self.get_static_graph_result(
feed={'pixel': inp_np}, fetch_list=[out])[0]
with self.dynamic_graph():
conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_filters=10, output_size=28)
'conv2d_transpose',
num_filters=10,
output_size=28,
act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1))
dy_rlt = conv2d_transpose(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt2))
def test_bilinear_tensor_product(self):
inp_np_x = np.array([[1, 2, 3]]).astype('float32')
......@@ -382,11 +415,17 @@ class TestLayer(LayerTest):
shape=[1, 3],
dtype="float32",
append_batch_size=False)
out = layers.bilinear_tensor_product(data_x, data_y, 6)
out = layers.bilinear_tensor_product(
data_x,
data_y,
6,
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
static_rlt = self.get_static_graph_result(
feed={'x': inp_np_x,
'y': inp_np_y}, fetch_list=[out])[0]
with self.static_graph():
data_x = layers.data(
name='x',
......@@ -398,17 +437,49 @@ class TestLayer(LayerTest):
shape=[1, 3],
dtype="float32",
append_batch_size=False)
btp = nn.BilinearTensorProduct('btp', 6)
btp = nn.BilinearTensorProduct(
'btp',
6,
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
out = btp(data_x, data_y)
static_rlt2 = self.get_static_graph_result(
feed={'x': inp_np_x,
'y': inp_np_y}, fetch_list=[out])[0]
with self.dynamic_graph():
btp = nn.BilinearTensorProduct('btp', 6)
btp = nn.BilinearTensorProduct(
'btp',
6,
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
with self.dynamic_graph():
btp2 = nn.BilinearTensorProduct('btp', 6, act='sigmoid')
dy_rlt2 = btp2(
base.to_variable(inp_np_x), base.to_variable(inp_np_y))
with self.static_graph():
data_x2 = layers.data(
name='x',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
data_y2 = layers.data(
name='y',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
out2 = layers.bilinear_tensor_product(
data_x2, data_y2, 6, act='sigmoid')
static_rlt3 = self.get_static_graph_result(
feed={'x': inp_np_x,
'y': inp_np_y}, fetch_list=[out2])[0]
self.assertTrue(np.array_equal(dy_rlt2.numpy(), static_rlt3))
self.assertTrue(np.array_equal(static_rlt2, static_rlt))
self.assertTrue(np.array_equal(dy_rlt.numpy(), static_rlt))
def test_prelu(self):
inp_np = np.ones([5, 200, 100, 100]).astype('float32')
......@@ -497,7 +568,8 @@ class TestLayer(LayerTest):
words.append(
layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1)
embs = []
for i in range(window_size):
if i == label_word:
......@@ -519,7 +591,8 @@ class TestLayer(LayerTest):
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
bias_attr='nce.b',
sample_weight=sample_weights)
feed_dict = dict()
for i in range(window_size):
feed_dict['word_{0}'.format(i)] = inp_word[i]
......@@ -531,7 +604,8 @@ class TestLayer(LayerTest):
words.append(
layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1)
emb = nn.Embedding(
'embedding',
size=[dict_size, 32],
......@@ -554,7 +628,8 @@ class TestLayer(LayerTest):
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
bias_attr='nce.b',
sample_weight=sample_weights)
nce_loss2 = nce(embs2, words[label_word])
feed_dict = dict()
......@@ -568,7 +643,8 @@ class TestLayer(LayerTest):
words = []
for i in range(window_size):
words.append(base.to_variable(inp_word[i]))
sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1)
emb = nn.Embedding(
'embedding',
size=[dict_size, 32],
......@@ -591,7 +667,8 @@ class TestLayer(LayerTest):
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
bias_attr='nce.b',
sample_weight=sample_weights)
nce_loss3 = nce(embs3, words[label_word])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册