提交 cf475f95 编写于 作者: Z zhongpu 提交者: hong

Remove FC in dygraph, modify FC to Linear in sample code (#22082)

* modify fc to linear in sample code, test=develop

* remove FC, test=develop

* remove warnings, test=develop

* drop fluid/imperative/README.md , test=develop

* change fc to linear, test=develop

* polish code style, test=develop
上级 64a40442
......@@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) {
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
linear = Linear(32, 64)
data = to_variable(data)
x = fc(data)
x = linear(data)
print(x.numpy())
)DOC")
......@@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) {
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
linear = Linear(32, 64)
data = to_variable(data)
x = fc(data)
x = linear(data)
y = x.detach()
)DOC")
......
......@@ -84,12 +84,12 @@ def _no_grad_(func):
@fluid.dygraph.no_grad
def test_layer():
with fluid.dygraph.guard():
inp = np.ones([3, 32, 32], dtype='float32')
inp = np.ones([3, 1024], dtype='float32')
t = fluid.dygraph.base.to_variable(inp)
fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = fluid.FC('fc2', size=4)
ret = fc1(t)
dy_ret = fc2(ret)
linear1 = fluid.Linear(1024, 4, bias_attr=False)
linear2 = fluid.Linear(4, 4)
ret = linear1(t)
dy_ret = linear2(ret)
test_layer()
......@@ -127,12 +127,12 @@ def guard(place=None):
import paddle.fluid as fluid
with fluid.dygraph.guard():
inp = np.ones([3, 32, 32], dtype='float32')
inp = np.ones([3, 1024], dtype='float32')
t = fluid.dygraph.base.to_variable(inp)
fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = fluid.FC('fc2', size=4)
ret = fc1(t)
dy_ret = fc2(ret)
linear1 = fluid.Linear(1024, 4, bias_attr=False)
linear2 = fluid.Linear(4, 4)
ret = linear1(t)
dy_ret = linear2(ret)
"""
train = framework.Program()
......
......@@ -29,10 +29,9 @@ import numbers
import logging
__all__ = [
'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'Linear', 'BatchNorm', 'Embedding',
'GRUUnit', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct',
'Conv2DTranspose', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm',
'TreeConv'
'Conv2D', 'Conv3D', 'Pool2D', 'Linear', 'BatchNorm', 'Embedding', 'GRUUnit',
'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose',
'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv'
]
......@@ -865,7 +864,7 @@ class Linear(layers.Layer):
where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
Different from FC layer, Linear layer takes only one ``Tensor`` input.
Linear layer takes only one ``Tensor`` input.
The Linear layer multiplies input tensor with weight matrix and
produces an output Tensor of shape [N, *, `output_dim`],
where N is batch size and `*` means any number of additional dimensions.
......@@ -959,221 +958,6 @@ class Linear(layers.Layer):
return self._helper.append_activation(pre_activation, act=self._act)
class FC(layers.Layer):
"""
This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples.
It creates a fully connected layer in the network. It can take
one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [N, `size`],
where N is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
is not None, a bias variable will be created and added to the output.
Finally, if ``act`` is not None, it will be applied to the output as well.
When the input is single ``Tensor`` :
.. math::
Out = Act({XW + b})
When the input are multiple ``Tensor`` :
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
* :math:`X_i`: The i-th input ``Tensor`` .
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output ``Tensor`` .
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2]]]
data_1.shape = (1, 1, 2) # 1 is batch_size
data_2.data = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3) # 1 is batch_size
fc = FC("fc", 2, num_flatten_dims=2)
out = fc(input=[data_1, data_2])
Then:
out.data = [[[0.182996 -0.474117]]]
out.shape = (1, 1, 2)
Parameters:
name_scope(str): The name of this class.
size(int): The number of output units in this layer.
num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimension tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
weights(Parameter) of this layer. Default: None.
bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, optional): Activation to be applied to the output of this layer. Default: None.
is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
Attribute:
**weight** (list of Parameter): the learnable weights of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Returns:
None
Examples:
.. code-block:: python
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
from paddle.fluid.dygraph import FC
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
data = to_variable(data)
conv = fc(data)
"""
def __init__(self,
name_scope,
size,
num_flatten_dims=1,
param_attr=None,
bias_attr=None,
act=None,
is_test=False,
dtype="float32"):
super(FC, self).__init__(name_scope, dtype)
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self.__w = list()
def _build_once(self, input):
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
input_shape = inp.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
1)
] + [self._size]
self.__w.append(
self.add_parameter(
'_w%d' % i,
self.create_parameter(
attr=param,
shape=param_shape,
dtype=self._dtype,
is_bias=False)))
i += 1
size = list([self._size])
self._b = self.create_parameter(
attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
# TODO(songyouwei): We should remove _w property
@property
def _w(self, i=0):
return self.__w[i]
@_w.setter
def _w(self, value, i=0):
assert isinstance(self.__w[i], Variable)
self.__w[i].set_value(value)
@property
def weight(self):
if len(self.__w) > 1:
return self.__w
else:
return self.__w[0]
@weight.setter
def weight(self, value):
if len(self.__w) == 1:
self.__w[0] = value
@property
def bias(self):
return self._b
@bias.setter
def bias(self, value):
self._b = value
def forward(self, input):
mul_results = list()
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inp,
"Y": self.__w[i]},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
i += 1
mul_results.append(tmp)
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = self._helper.create_variable_for_type_inference(
self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": False})
if self._b:
pre_activation = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._b]},
outputs={'Out': [pre_activation]},
attrs={'axis': self._num_flatten_dims})
else:
pre_activation = pre_bias
# Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act)
class BatchNorm(layers.Layer):
"""
This interface is used to construct a callable object of the ``BatchNorm`` class.
......
......@@ -97,7 +97,7 @@ class DataParallel(layers.Layer):
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable
place = fluid.CUDAPlace(0)
......@@ -106,28 +106,28 @@ class DataParallel(layers.Layer):
# prepare the data parallel context
strategy=dygraph.parallel.prepare_context()
fc_layer = FC("FC", 10, act="softmax")
linear = Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer()
# make the module become the data parallelism module
fc_layer = dygraph.parallel.DataParallel(fc_layer, strategy)
linear = dygraph.parallel.DataParallel(linear, strategy)
x_data = np.random.random(size=[10, 1]).astype(np.float32)
data = to_variable(x_data)
hidden = fc_layer(data)
hidden = linear(data)
avg_loss = fluid.layers.mean(hidden)
# scale the loss according to the number of trainers.
avg_loss = fc_layer.scale_loss(avg_loss)
avg_loss = linear.scale_loss(avg_loss)
avg_loss.backward()
# collect the gradients of trainers.
fc_layer.apply_collective_grads()
linear.apply_collective_grads()
adam.minimize(avg_loss)
fc_layer.clear_gradients()
linear.clear_gradients()
Args:
layers(Layer): The module that should be executed by data parallel.
......
......@@ -39,17 +39,17 @@ def monkey_patch_varbase():
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.ones([3, 32, 32], dtype='float32')
data = np.ones([3, 1024], dtype='float32')
with fluid.dygraph.guard():
fc = fluid.dygraph.FC("fc", 4)
linear = fluid.dygraph.Linear(1024, 4)
t = to_variable(data)
fc(t) # call with default weight
linear(t) # call with default weight
custom_weight = np.random.randn(1024, 4).astype("float32")
fc.weight.set_value(custom_weight) # change existing weight
out = fc(t) # call with different weight
linear.weight.set_value(custom_weight) # change existing weight
out = linear(t) # call with different weight
"""
assert isinstance(value, (np.ndarray, core.VarBase)), \
......
......@@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
......@@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10)
linear = Linear( 10, 10)
out = fc( to_variable(init_value) )
out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out )
......@@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
......@@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10)
linear = Linear( 10, 10)
out = fc( to_variable(init_value) )
out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out )
......@@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
......@@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10)
linear = Linear( 10, 10)
out = fc( to_variable(init_value) )
out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out )
......
......@@ -959,14 +959,14 @@ class Variable(object):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
linear = Linear(32, 64)
data = to_variable(data)
x = fc(data)
x = linear(data)
y = x.detach()
"""
......@@ -991,14 +991,14 @@ class Variable(object):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
linear = Linear(32, 64)
data = to_variable(data)
x = fc(data)
x = linear(data)
print(x.numpy())
"""
......@@ -1020,17 +1020,17 @@ class Variable(object):
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
import numpy as np
data = np.ones([3, 32, 32], dtype='float32')
data = np.ones([3, 1024], dtype='float32')
with fluid.dygraph.guard():
fc = fluid.dygraph.FC("fc", 4)
linear = fluid.dygraph.Linear(1024, 4)
t = to_variable(data)
fc(t) # call with default weight
linear(t) # call with default weight
custom_weight = np.random.randn(1024, 4).astype("float32")
fc.weight.set_value(custom_weight) # change existing weight
out = fc(t) # call with different weight
linear.weight.set_value(custom_weight) # change existing weight
out = linear(t) # call with different weight
"""
pass
......@@ -1223,18 +1223,18 @@ class Variable(object):
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(3, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1 = linear(a)
out2 = linear2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
out.backward()
assert (fc._w.gradient() == 0).all()
assert (linear.weight.gradient() == 0).all()
assert (out1.gradient() == 0).all()
"""
if in_dygraph_mode():
......
......@@ -30,14 +30,15 @@ __all__ = ['run_check']
class SimpleLayer(Layer):
def __init__(self, name_scope):
super(SimpleLayer, self).__init__(name_scope)
self._fc1 = nn.FC(self.full_name(),
def __init__(self, input_size):
super(SimpleLayer, self).__init__()
self._linear1 = nn.Linear(
input_size,
3,
param_attr=ParamAttr(initializer=Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._linear1(inputs)
x = layers.reduce_sum(x)
return x
......@@ -79,7 +80,7 @@ def run_check():
build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = True
inp = layers.data(name="inp", shape=[2, 2])
simple_layer = SimpleLayer("simple_layer")
simple_layer = SimpleLayer(input_size=2)
out = simple_layer(inp)
exe = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
......@@ -108,10 +109,11 @@ def run_check():
with unique_name.guard():
inp0 = layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
simple_layer0 = SimpleLayer("simple_layer")
simple_layer0 = SimpleLayer(input_size=2)
out0 = simple_layer0(inp0)
param_grads = backward.append_backward(
out0, parameter_list=[simple_layer0._fc1._w.name])[0]
out0,
parameter_list=[simple_layer0._linear1.weight.name])[0]
exe0 = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and
(core.get_cuda_device_count() > 0) else core.CPUPlace())
......
......@@ -3002,7 +3002,7 @@ def layer_norm(input,
print(output)
"""
assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!"
) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype()
......
......@@ -17,8 +17,7 @@ from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid import FC
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable
import unittest
......@@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase):
def no_detach_multi(self):
data = self.generate_Data()
with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr(
linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr(
linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0))
fc = FC("fc",
linear = Linear(
4,
10,
num_flatten_dims=1,
param_attr=fc_w_param_attrs,
bias_attr=fc_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr(
param_attr=linear_w_param_attrs,
bias_attr=linear_b_param_attrs)
linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr(
linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc",
linear1 = Linear(
10,
1,
num_flatten_dims=1,
param_attr=fc1_w_param_attrs,
bias_attr=fc1_b_param_attrs)
fc2_w_param_attrs = fluid.ParamAttr(
param_attr=linear1_w_param_attrs,
bias_attr=linear1_b_param_attrs)
linear2_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(9.0))
fc2_b_param_attrs = fluid.ParamAttr(
linear2_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(10.0))
fc2 = FC("fc",
linear2 = Linear(
10,
1,
num_flatten_dims=1,
param_attr=fc2_w_param_attrs,
bias_attr=fc2_b_param_attrs)
param_attr=linear2_w_param_attrs,
bias_attr=linear2_b_param_attrs)
data = to_variable(data)
x = fc(data)
x1 = fc1(x)
x2 = fc2(x)
x = linear(data)
x1 = linear1(x)
x2 = linear2(x)
loss = x1 + x2
# print(loss, loss.shape)
loss.backward()
......@@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase):
def no_detach_single(self):
data = self.generate_Data()
with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr(
linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr(
linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0))
fc = FC("fc",
linear = Linear(
4,
10,
num_flatten_dims=1,
param_attr=fc_w_param_attrs,
bias_attr=fc_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr(
param_attr=linear_w_param_attrs,
bias_attr=linear_b_param_attrs)
linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr(
linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc",
linear1 = Linear(
10,
1,
num_flatten_dims=1,
param_attr=fc1_w_param_attrs,
bias_attr=fc1_b_param_attrs)
param_attr=linear1_w_param_attrs,
bias_attr=linear1_b_param_attrs)
data = to_variable(data)
x = fc(data)
x1 = fc1(x)
x = linear(data)
x1 = linear1(x)
loss = x1
# print(loss, loss.shape)
loss.backward()
......@@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase):
def detach_multi(self):
data = self.generate_Data()
with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr(
linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr(
linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0))
fc = FC("fc",
linear = Linear(
4,
10,
num_flatten_dims=1,
param_attr=fc_w_param_attrs,
bias_attr=fc_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr(
param_attr=linear_w_param_attrs,
bias_attr=linear_b_param_attrs)
linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr(
linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc",
linear1 = Linear(
10,
1,
num_flatten_dims=1,
param_attr=fc1_w_param_attrs,
bias_attr=fc1_b_param_attrs)
fc2_w_param_attrs = fluid.ParamAttr(
param_attr=linear1_w_param_attrs,
bias_attr=linear1_b_param_attrs)
linear2_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(9.0))
fc2_b_param_attrs = fluid.ParamAttr(
linear2_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(10.0))
fc2 = FC("fc",
linear2 = Linear(
10,
1,
num_flatten_dims=1,
param_attr=fc2_w_param_attrs,
bias_attr=fc2_b_param_attrs)
param_attr=linear2_w_param_attrs,
bias_attr=linear2_b_param_attrs)
data = to_variable(data)
x = fc(data)
x = linear(data)
x_detach = x.detach()
x1 = fc1(x)
x2 = fc2(x_detach)
x1 = linear1(x)
x2 = linear2(x_detach)
loss = x1 + x2
# print(loss, loss.shape)
loss.backward()
......
......@@ -18,7 +18,7 @@ import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
class SimpleImgConvPool(fluid.dygraph.Layer):
......@@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope, dtype="float32"):
super(MNIST, self).__init__(name_scope)
def __init__(self, dtype="float32"):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
num_channels=3,
......@@ -94,10 +94,11 @@ class MNIST(fluid.dygraph.Layer):
dtype=dtype,
use_cudnn=True)
pool_2_shape = 50 * 4 * 4
self.pool_2_shape = 50 * 53 * 53
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._linear = Linear(
self.pool_2_shape,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
......@@ -108,7 +109,8 @@ class MNIST(fluid.dygraph.Layer):
def forward(self, inputs, label):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
cost = self._fc(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
cost = self._linear(x)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
return avg_loss
......@@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase):
x = np.random.randn(1, 3, 224, 224).astype("float16")
y = np.random.randn(1, 1).astype("int64")
with fluid.dygraph.guard(fluid.CUDAPlace(0)):
model = MNIST("mnist", dtype="float16")
model = MNIST(dtype="float16")
x = fluid.dygraph.to_variable(x)
y = fluid.dygraph.to_variable(y)
loss = model(x, y)
......
......@@ -18,44 +18,44 @@ import numpy as np
class AutoPruneLayer0(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer0, self).__init__(name_scope)
self.fc1 = fluid.dygraph.FC(
"FC_1",
def __init__(self, input_size):
super(AutoPruneLayer0, self).__init__()
self.linear1 = fluid.dygraph.Linear(
input_size,
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
self.fc2 = fluid.dygraph.FC(
"FC_2",
self.linear2 = fluid.dygraph.Linear(
5,
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
def forward(self, x, y):
a = self.fc1(x)
b = self.fc2(y)
a = self.linear1(x)
b = self.linear2(y)
c = fluid.layers.mul(a, b)
d = fluid.layers.reduce_mean(c)
return d
class AutoPruneLayer1(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer1, self).__init__(name_scope)
self.fc1 = fluid.dygraph.FC(
"FC_1",
def __init__(self, input_size):
super(AutoPruneLayer1, self).__init__()
self.linear1 = fluid.dygraph.Linear(
input_size,
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
self.fc2 = fluid.dygraph.FC(
"FC_2",
self.linear2 = fluid.dygraph.Linear(
5,
5,
param_attr=fluid.initializer.ConstantInitializer(value=2),
bias_attr=False)
def forward(self, x, y):
a = self.fc1(x)
b = self.fc2(y)
a = self.linear1(x)
b = self.linear2(y)
b.stop_gradient = True
c = fluid.layers.mul(a, b)
d = fluid.layers.reduce_mean(c)
......@@ -63,14 +63,14 @@ class AutoPruneLayer1(fluid.Layer):
class AutoPruneLayer2(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer2, self).__init__(name_scope)
self.fc = fluid.dygraph.FC("FC1", size=10, act=None)
self.fc2 = fluid.dygraph.FC("FC2", size=1, act=None)
def __init__(self, input_size):
super(AutoPruneLayer2, self).__init__()
self.linear = fluid.dygraph.Linear(input_size, 10, act=None)
self.linear2 = fluid.dygraph.Linear(1, 1, act=None)
def forward(self, x, label):
feature = self.fc(x)
label = self.fc2(label)
feature = self.linear(x)
label = self.linear2(label)
label = fluid.layers.cast(label, dtype="float32")
label = fluid.layers.cast(label, dtype='int64')
# Note that the label is not persistable in fluid.layers.cross_entropy.
......@@ -80,12 +80,12 @@ class AutoPruneLayer2(fluid.Layer):
class AutoPruneLayer3(fluid.Layer):
def __init__(self, name_scope):
super(AutoPruneLayer3, self).__init__(name_scope)
self.fc = fluid.dygraph.FC("FC1", size=20, act=None)
def __init__(self, input_size):
super(AutoPruneLayer3, self).__init__()
self.linear = fluid.dygraph.Linear(input_size, 20, act=None)
def forward(self, x, label, test_num):
feature = self.fc(x)
feature = self.linear(x)
part1, part2 = fluid.layers.split(
feature, num_or_sections=[10, 10], dim=1)
# Note that: part2 is not used.
......@@ -98,67 +98,68 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(name_scope, dtype)
def __init__(self, input_size, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
def forward(self, x):
# this method involves only the fc layers
loss = fluid.layers.reduce_mean(self.fc0(x) + self.fc1(x))
# this method involves only the linear layers
loss = fluid.layers.reduce_mean(self.linear_0(x) + self.linear_1(x))
return loss
def linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(x))
loss = fluid.layers.reduce_mean(self.linear_0(x))
return loss
def embed_linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
return loss
class MyLayer2(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(name_scope, dtype)
def __init__(self, input_size, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(dtype=dtype)
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.linear_0 = fluid.Linear(input_size, size, dtype=dtype)
self.linear_1 = fluid.Linear(input_size, size, dtype=dtype)
def forward(self, indices):
# mind the difference with MyLayer
# In this example, the forward method involes all params
loss = fluid.layers.reduce_mean(
self.fc0(self.embed0(indices)) + self.fc1(self.embed1(indices)))
self.linear_0(self.embed0(indices)) + self.linear_1(
self.embed1(indices)))
return loss
def linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(x))
loss = fluid.layers.reduce_mean(self.linear_0(x))
return loss
def embed_linear0(self, x):
loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x)))
loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x)))
return loss
class TestImperativeAutoPrune(unittest.TestCase):
def test_auto_prune(self):
with fluid.dygraph.guard():
case1 = AutoPruneLayer0("l1")
case1 = AutoPruneLayer0(input_size=5)
value1 = np.arange(25).reshape(5, 5).astype("float32")
value2 = np.arange(25).reshape(5, 5).astype("float32")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss = case1(v1, v2)
loss.backward()
self.assertTrue(case1.fc2.weight._grad_ivar() is not None)
self.assertTrue(case1.fc1.weight._grad_ivar() is not None)
self.assertTrue(case1.linear2.weight._grad_ivar() is not None)
self.assertTrue(case1.linear1.weight._grad_ivar() is not None)
def test_auto_prune2(self):
with fluid.dygraph.guard():
case2 = AutoPruneLayer1("l1")
case2 = AutoPruneLayer1(input_size=5)
value1 = np.arange(25).reshape(5, 5).astype("float32")
value2 = np.arange(25).reshape(5, 5).astype("float32")
v1 = fluid.dygraph.to_variable(value1)
......@@ -166,43 +167,43 @@ class TestImperativeAutoPrune(unittest.TestCase):
loss = case2(v1, v2)
loss.backward()
self.assertTrue(case2.fc2.weight._grad_ivar() is None)
self.assertTrue(case2.fc1.weight._grad_ivar() is not None)
self.assertTrue(case2.linear2.weight._grad_ivar() is None)
self.assertTrue(case2.linear1.weight._grad_ivar() is not None)
def test_auto_prune3(self):
with fluid.dygraph.guard():
case3 = AutoPruneLayer3("l3")
case3 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case3(v1, v2, 1)
loss.backward()
self.assertTrue(case3.fc.weight._grad_ivar() is not None)
self.assertTrue(case3.linear.weight._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 0).all())
def test_auto_prune4(self):
with fluid.dygraph.guard():
case4 = AutoPruneLayer3("l3")
case4 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part2 = case4(v1, v2, 1)
part2.backward()
self.assertTrue(case4.fc.weight._grad_ivar() is not None)
self.assertTrue(case4.linear.weight._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 1).all())
def test_auto_prune5(self):
with fluid.dygraph.guard():
case4 = AutoPruneLayer3("l3")
case4 = AutoPruneLayer3(input_size=784)
value1 = np.arange(784).reshape(1, 784).astype("float32")
value2 = np.arange(1).reshape(1, 1).astype("int64")
v1 = fluid.dygraph.to_variable(value1)
v2 = fluid.dygraph.to_variable(value2)
loss, part1, part2 = case4(v1, v2, 2)
part1.backward()
self.assertTrue(case4.fc.weight._grad_ivar() is not None)
self.assertTrue(case4.linear.weight._grad_ivar() is not None)
self.assertTrue((part2.gradient() == 0).all())
def test_auto_prune6(self):
......@@ -210,17 +211,17 @@ class TestImperativeAutoPrune(unittest.TestCase):
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(3, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1 = linear(a)
out2 = linear2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
out.backward()
self.assertTrue((fc.weight.gradient() == 0).all())
self.assertTrue((linear.weight.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune7(self):
......@@ -228,18 +229,18 @@ class TestImperativeAutoPrune(unittest.TestCase):
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(3, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1 = linear(a)
out2 = linear2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
backward_strategy = fluid.dygraph.BackwardStrategy()
out.backward(backward_strategy)
self.assertTrue((fc.weight.gradient() == 0).all())
self.assertTrue((linear.weight.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune8(self):
......@@ -247,48 +248,52 @@ class TestImperativeAutoPrune(unittest.TestCase):
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(5, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
fc_origin = fc.weight.numpy()
out2 = fc2(out1)
fc2_origin = fc2.weight.numpy()
fc2.weight.stop_gradient = True
out1 = linear(a)
linear_origin = linear.weight.numpy()
out2 = linear2(out1)
linear2_origin = linear2.weight.numpy()
linear2.weight.stop_gradient = True
out2.backward()
optimizer = fluid.optimizer.SGD(
learning_rate=0.003,
parameter_list=(fc.parameters() + fc2.parameters()))
parameter_list=(linear.parameters() + linear2.parameters()))
optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
self.assertFalse(np.array_equal(fc_origin, fc.weight.numpy()))
self.assertTrue(
np.array_equal(linear2_origin, linear2.weight.numpy()))
self.assertFalse(
np.array_equal(linear_origin, linear.weight.numpy()))
def test_auto_prune9(self):
with fluid.dygraph.guard():
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(5, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
fc_origin = fc.weight.numpy()
out2 = fc2(out1)
fc2_origin = fc2.weight.numpy()
out1 = linear(a)
linear_origin = linear.weight.numpy()
out2 = linear2(out1)
linear2_origin = linear2.weight.numpy()
out2.stop_gradient = True
out2.backward()
optimizer = fluid.optimizer.SGD(
learning_rate=0.003,
parameter_list=(fc.parameters() + fc2.parameters()))
parameter_list=(linear.parameters() + linear2.parameters()))
optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy()))
self.assertTrue(np.array_equal(fc_origin, fc.weight.numpy()))
self.assertTrue(
np.array_equal(linear2_origin, linear2.weight.numpy()))
self.assertTrue(
np.array_equal(linear_origin, linear.weight.numpy()))
try:
fc2.weight.gradient()
linear2.weight.gradient()
except ValueError as e:
assert type(e) == ValueError
......@@ -297,19 +302,19 @@ class TestImperativeAutoPrune(unittest.TestCase):
value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32")
linear = fluid.Linear(13, 5, dtype="float32")
linear2 = fluid.Linear(3, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2)
out1 = fc(a)
out2 = fc2(b)
out1 = linear(a)
out2 = linear2(b)
out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1)
backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True
out.backward(backward_strategy)
self.assertTrue((fc.weight.gradient() == 0).all())
self.assertTrue((linear.weight.gradient() == 0).all())
self.assertTrue((out1.gradient() == 0).all())
def test_auto_prune_with_optimizer(self):
......@@ -323,13 +328,13 @@ class TestImperativeAutoPrune(unittest.TestCase):
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
model = MyLayer("mylayer", vocab_size, size)
model = MyLayer(size, vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(
0.001, parameter_list=model.parameters())
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
indices = fluid.dygraph.to_variable(indices)
emebd = fluid.dygraph.to_variable(embed)
embed = fluid.dygraph.to_variable(embed)
dummy_loss = model(embed)
loss = model.embed_linear0(indices)
......@@ -337,12 +342,12 @@ class TestImperativeAutoPrune(unittest.TestCase):
_, params_grads = optimizer.minimize(loss, grad_clip=grad_clip)
for items in params_grads:
assert items[0].name is not model.embed1.weight.name
assert items[0].name is not model.fc1.weight.name
assert items[0].name is not model.linear_1.weight.name
assert model.embed1.weight._grad_ivar() is None
assert model.fc1.weight._grad_ivar() is None
assert model.linear_1.weight._grad_ivar() is None
with fluid.dygraph.guard(place):
model = MyLayer2("mylayer", vocab_size, size)
model = MyLayer2(size, vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(
0.001, parameter_list=model.parameters())
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
......@@ -356,9 +361,9 @@ class TestImperativeAutoPrune(unittest.TestCase):
optimizer.minimize(loss, grad_clip=grad_clip)
for items in params_grads:
assert items[0].name is not model.embed1.weight.name
assert items[0].name is not model.fc1.weight.name
assert items[0].name is not model.linear_1.weight.name
assert model.embed1.weight._grad_ivar() is None
assert model.fc1.weight._grad_ivar() is None
assert model.linear_1.weight._grad_ivar() is None
def test_case2_prune_no_grad_branch(self):
with fluid.dygraph.guard():
......@@ -366,11 +371,11 @@ class TestImperativeAutoPrune(unittest.TestCase):
value2 = np.arange(1).reshape(1, 1)
v1 = fluid.dygraph.to_variable(value1).astype("float32")
v2 = fluid.dygraph.to_variable(value2).astype("float32")
case3 = AutoPruneLayer2("l2")
case3 = AutoPruneLayer2(input_size=784)
loss = case3(v1, v2)
loss.backward()
self.assertTrue(case3.fc2.weight._grad_ivar() is None)
self.assertTrue(case3.fc.weight._grad_ivar() is not None)
self.assertTrue(case3.linear2.weight._grad_ivar() is None)
self.assertTrue(case3.linear.weight._grad_ivar() is not None)
def test_case2_prune_no_grad_branch(self):
with fluid.dygraph.guard():
......@@ -378,24 +383,24 @@ class TestImperativeAutoPrune(unittest.TestCase):
value2 = np.arange(1).reshape(1, 1)
v1 = fluid.dygraph.to_variable(value1).astype("float32")
v2 = fluid.dygraph.to_variable(value2).astype("float32")
case3 = AutoPruneLayer2("l2")
case3 = AutoPruneLayer2(input_size=784)
loss = case3(v1, v2)
loss.backward()
self.assertTrue(case3.fc2.weight._grad_ivar() is None)
self.assertTrue(case3.fc.weight._grad_ivar() is not None)
self.assertTrue(case3.linear2.weight._grad_ivar() is None)
self.assertTrue(case3.linear.weight._grad_ivar() is not None)
def test_case3_prune_no_grad_branch2(self):
with fluid.dygraph.guard():
value1 = np.arange(1).reshape(1, 1)
fc = fluid.dygraph.FC("FC1", size=1, act=None)
linear = fluid.dygraph.Linear(1, 1, act=None)
label = fluid.dygraph.to_variable(value1).astype("float32")
label = fc(label)
label = linear(label)
label = fluid.layers.cast(label, dtype="float32")
label = fluid.layers.cast(label, dtype='int64')
out = fluid.layers.one_hot(input=label, depth=100)
loss = fluid.layers.mean(out)
loss.backward()
self.assertTrue(fc.weight._grad_ivar() is None)
self.assertTrue(linear.weight._grad_ivar() is None)
def test_case4_with_no_grad_op_maker(self):
with fluid.dygraph.guard():
......
......@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid import FC
from paddle.fluid import Linear
from test_imperative_base import new_program_scope
......@@ -35,15 +35,17 @@ class MyLayer(fluid.Layer):
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(),
def __init__(self, input_size):
super(MLP, self).__init__()
self._linear1 = Linear(
input_size,
3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(self.full_name(),
self._linear2 = Linear(
3,
4,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
......@@ -51,8 +53,8 @@ class MLP(fluid.Layer):
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
return x
......@@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp")
mlp = MLP(input_size=2)
out = mlp(var_inp)
dy_out = out.numpy()
out.backward()
dy_grad = mlp._fc1.weight.gradient()
dy_grad = mlp._linear1.weight.gradient()
with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp)
mlp2 = MLP("mlp")
mlp2 = MLP(input_size=2)
out2 = mlp2(var_inp2)
dy_out2 = out2.numpy()
backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True
out2.backward(backward_strategy)
dy_grad2 = mlp2._fc1.weight.gradient()
dy_grad2 = mlp2._linear1.weight.gradient()
with new_program_scope():
inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP("mlp")
mlp = MLP(input_size=2)
out = mlp(inp)
param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1.weight.name])[0]
out, parameter_list=[mlp._linear1.weight.name])[0]
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
......@@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(dy_grad2, static_grad))
params = mlp.parameters(True)
self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name)
self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name)
self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name)
self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name)
self.assertEqual("linear_0.w_0", params[0].name)
self.assertEqual("linear_0.b_0", params[1].name)
self.assertEqual("linear_1.w_0", params[2].name)
self.assertEqual("linear_1.b_0", params[3].name)
self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True)
self.assertEqual(mlp._fc1, sublayers[0])
self.assertEqual(mlp._fc2, sublayers[1])
self.assertEqual(mlp._linear1, sublayers[0])
self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2)
def test_dygraph_vs_static(self):
......
......@@ -20,17 +20,17 @@ import numpy as np
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = fluid.dygraph.FC(
self.full_name(),
def __init__(self, input_size):
super(MLP, self).__init__()
self._linear1 = fluid.dygraph.Linear(
input_size,
3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC(
self.full_name(),
self._linear2 = fluid.dygraph.Linear(
3,
4,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
......@@ -38,8 +38,8 @@ class MLP(fluid.Layer):
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
return x
......@@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase):
trace_var = 0
alive_var = 0
with fluid.dygraph.guard():
mlp = MLP("mlp")
mlp = MLP(input_size=2)
for i in range(10):
var_inp = fluid.dygraph.base.to_variable(np_inp)
out = mlp(var_inp)
......
......@@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = fluid.dygraph.FC(
self.full_name(),
def __init__(self, input_size):
super(MLP, self).__init__()
self._linear1 = fluid.dygraph.Linear(
input_size,
3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC(
self.full_name(),
self._linear2 = fluid.dygraph.Linear(
3,
4,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
......@@ -39,8 +39,8 @@ class MLP(fluid.Layer):
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
x = self._linear1(inputs)
x = self._linear2(x)
x = fluid.layers.reduce_sum(x)
return x
......@@ -48,7 +48,7 @@ class MLP(fluid.Layer):
class TestDygraphFramework(unittest.TestCase):
def test_dygraph_backward(self):
with new_program_scope():
mlp = MLP("mlp")
mlp = MLP(input_size=2)
var_inp = fluid.layers.data(
"input", shape=[2, 2], dtype="float32", append_batch_size=False)
out = mlp(var_inp)
......
......@@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase):
with fluid.dygraph.guard():
x = np.random.randn(2, 4, 5).astype("float32")
x = fluid.dygraph.to_variable(x)
fc1 = fluid.dygraph.FC("fc1", 10, num_flatten_dims=2)
fc2 = fluid.dygraph.FC("fc2", 10, num_flatten_dims=2)
linear1 = fluid.dygraph.Linear(5, 10)
linear2 = fluid.dygraph.Linear(5, 10)
y = fc1(x[:, :2])
z = fc2(x[:, 2:])
y = linear1(x[:, :2])
z = linear2(x[:, 2:])
loss = fluid.layers.reduce_mean(y)
loss.backward()
for param in fc1.parameters():
for param in linear1.parameters():
self.assertIsNotNone(param._grad_ivar())
for param in fc2.parameters():
for param in linear2.parameters():
self.assertIsNone(param._grad_ivar())
optimizer = fluid.optimizer.AdamOptimizer(parameter_list=(
fc1.parameters() + fc2.parameters()))
linear1.parameters() + linear2.parameters()))
_, params_grads = optimizer.minimize(loss)
self.assertListEqual(
sorted([p.name for p in fc1.parameters()]),
sorted([p.name for p in linear1.parameters()]),
sorted([p_g[0].name for p_g in params_grads]))
fc1.clear_gradients()
fc2.clear_gradients()
linear1.clear_gradients()
linear2.clear_gradients()
if __name__ == '__main__':
......
......@@ -23,18 +23,18 @@ import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
import paddle.fluid.dygraph.nn as nn
from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope
class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Policy, self).__init__(name_scope)
def __init__(self, input_size):
super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128)
self.affine2 = nn.FC(self.full_name(), size=2)
self.affine1 = nn.Linear(input_size, 128)
self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6
self.saved_log_probs = []
......@@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
policy = Policy("PolicyModel")
policy = Policy(input_size=4)
dy_state = fluid.dygraph.base.to_variable(state)
dy_state.stop_gradient = True
......@@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
policy = Policy("PolicyModel")
policy = Policy(input_size=4)
st_sgd = SGDOptimizer(learning_rate=1e-3)
......
......@@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer):
class PtbModel(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
vocab_size,
num_layers=2,
num_steps=20,
init_scale=0.1,
dropout=None):
super(PtbModel, self).__init__(name_scope)
super(PtbModel, self).__init__()
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.init_scale = init_scale
......@@ -160,7 +159,18 @@ class PtbModel(fluid.Layer):
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))
self.out_project = Linear(self.hidden_size, self.vocab_size)
self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape(
......@@ -182,7 +192,8 @@ class PtbModel(fluid.Layer):
rnn_out = fluid.layers.reshape(
rnn_out, shape=[-1, self.num_steps, self.hidden_size])
projection = self.out_project(rnn_out)
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
projection = fluid.layers.reshape(
projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
......@@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......@@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
......
......@@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase):
class TestLayer(LayerTest):
def test_custom_layer_with_kwargs(self):
class CustomLayer(fluid.Layer):
def __init__(self, name_scope, fc1_size=4):
super(CustomLayer, self).__init__(name_scope)
self.fc1 = nn.FC('fc1',
size=fc1_size,
bias_attr=False,
num_flatten_dims=1)
self.fc2 = nn.FC('fc2',
size=1,
bias_attr=False,
num_flatten_dims=1)
def forward(self, x, do_fc2=False):
ret = self.fc1(x)
if do_fc2:
ret = self.fc2(ret)
def __init__(self, input_size, linear1_size=4):
super(CustomLayer, self).__init__()
self.linear1 = nn.Linear(
input_size, linear1_size, bias_attr=False)
self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False)
def forward(self, x, do_linear2=False):
ret = self.linear1(x)
if do_linear2:
ret = self.linear2(ret)
return ret
with self.dynamic_graph():
inp = np.ones([3, 3], dtype='float32')
x = base.to_variable(inp)
custom = CustomLayer('custom', fc1_size=2)
ret = custom(x, do_fc2=False)
custom = CustomLayer(input_size=3, linear1_size=2)
ret = custom(x, do_linear2=False)
self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2]))
ret = custom(x, do_fc2=True)
ret = custom(x, do_linear2=True)
self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1]))
def test_linear(self):
......@@ -133,112 +128,6 @@ class TestLayer(LayerTest):
self.assertTrue(np.array_equal(static_ret, dy_ret_value))
inp = np.ones([3, 32], dtype='float32')
with self.dynamic_graph():
t = base.to_variable(inp)
linear = nn.Linear(32, 4, bias_attr=False)
dy_ret = linear(t)
dy_ret_value = dy_ret.numpy()
with self.dynamic_graph():
t = base.to_variable(inp)
fc = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
dy_ret2 = fc(t)
dy_ret_value2 = dy_ret2.numpy()
self.assertTrue(np.array_equal(dy_ret_value, dy_ret_value2))
def test_fc(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
ret = layers.fc(t, size=4, bias_attr=False, num_flatten_dims=1)
ret2 = layers.fc(ret, size=4)
static_ret = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret2])[0]
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = nn.FC('fc2', size=4)
ret = fc1(t)
ret2 = fc2(ret)
static_ret2 = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret2])[0]
with self.dynamic_graph():
t = base.to_variable(inp)
fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = nn.FC('fc2', size=4)
ret = fc1(t)
dy_ret = fc2(ret)
dy_ret_value = dy_ret.numpy()
self.assertTrue(np.array_equal(static_ret, static_ret2))
self.assertTrue(np.array_equal(static_ret, dy_ret_value))
with self.dynamic_graph():
custom_weight = np.random.randn(1024, 4).astype("float32")
weight_attr1 = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight))
fc1 = fluid.dygraph.FC("fc1",
4,
num_flatten_dims=1,
param_attr=weight_attr1)
out1 = fc1(base.to_variable(inp))
loss1 = fluid.layers.reduce_mean(out1)
fc1_weight_init = fc1.weight.detach()
fc1_bias_init = fc1.bias.detach()
loss1.backward()
optimizer1 = fluid.optimizer.SGD(learning_rate=0.1,
parameter_list=fc1.parameters())
optimizer1.minimize(loss1)
fc1_weight_updated = fc1.weight.detach()
with self.dynamic_graph():
weight_attr2 = fluid.ParamAttr(
initializer=fluid.initializer.Uniform())
fc2 = fluid.dygraph.FC("fc2",
4,
num_flatten_dims=1,
param_attr=weight_attr2)
out2 = fc2(base.to_variable(inp))
self.assertFalse(
np.array_equal(fc1_weight_init.numpy(), fc2.weight.numpy()))
self.assertFalse(np.array_equal(out1.numpy(), out2.numpy()))
mismatched_weight = np.random.randn(4, 4).astype("float32")
with self.assertRaises(AssertionError):
fc2.weight.set_value(mismatched_weight)
fc2.weight.set_value(fc1_weight_init)
fc2.bias.set_value(fc1_bias_init)
out2 = fc2(base.to_variable(inp))
loss2 = fluid.layers.reduce_mean(out2)
loss2.backward()
optimizer2 = fluid.optimizer.SGD(learning_rate=0.1,
parameter_list=fc2.parameters())
optimizer2.minimize(loss2)
self.assertTrue(
np.array_equal(fc2.weight.numpy(), fc1_weight_updated.numpy()))
self.assertTrue(np.array_equal(out1.numpy(), out2.numpy()))
fc2.weight = fc1.weight
fc2.bias = fc1.bias
self.assertTrue(
np.array_equal(fc2.weight.numpy(), fc1.weight.numpy()))
self.assertTrue(np.array_equal(fc2.bias.numpy(), fc1.bias.numpy()))
def test_layer_norm(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册