提交 cf475f95 编写于 作者: Z zhongpu 提交者: hong

Remove FC in dygraph, modify FC to Linear in sample code (#22082)

* modify fc to linear in sample code, test=develop

* remove FC, test=develop

* remove warnings, test=develop

* drop fluid/imperative/README.md , test=develop

* change fc to linear, test=develop

* polish code style, test=develop
上级 64a40442
...@@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) { ...@@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) {
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2) linear = Linear(32, 64)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
print(x.numpy()) print(x.numpy())
)DOC") )DOC")
...@@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) { ...@@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) {
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2) linear = Linear(32, 64)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
y = x.detach() y = x.detach()
)DOC") )DOC")
......
...@@ -84,12 +84,12 @@ def _no_grad_(func): ...@@ -84,12 +84,12 @@ def _no_grad_(func):
@fluid.dygraph.no_grad @fluid.dygraph.no_grad
def test_layer(): def test_layer():
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inp = np.ones([3, 32, 32], dtype='float32') inp = np.ones([3, 1024], dtype='float32')
t = fluid.dygraph.base.to_variable(inp) t = fluid.dygraph.base.to_variable(inp)
fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) linear1 = fluid.Linear(1024, 4, bias_attr=False)
fc2 = fluid.FC('fc2', size=4) linear2 = fluid.Linear(4, 4)
ret = fc1(t) ret = linear1(t)
dy_ret = fc2(ret) dy_ret = linear2(ret)
test_layer() test_layer()
...@@ -127,12 +127,12 @@ def guard(place=None): ...@@ -127,12 +127,12 @@ def guard(place=None):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
inp = np.ones([3, 32, 32], dtype='float32') inp = np.ones([3, 1024], dtype='float32')
t = fluid.dygraph.base.to_variable(inp) t = fluid.dygraph.base.to_variable(inp)
fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) linear1 = fluid.Linear(1024, 4, bias_attr=False)
fc2 = fluid.FC('fc2', size=4) linear2 = fluid.Linear(4, 4)
ret = fc1(t) ret = linear1(t)
dy_ret = fc2(ret) dy_ret = linear2(ret)
""" """
train = framework.Program() train = framework.Program()
......
...@@ -29,10 +29,9 @@ import numbers ...@@ -29,10 +29,9 @@ import numbers
import logging import logging
__all__ = [ __all__ = [
'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'Linear', 'BatchNorm', 'Embedding', 'Conv2D', 'Conv3D', 'Pool2D', 'Linear', 'BatchNorm', 'Embedding', 'GRUUnit',
'GRUUnit', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose',
'Conv2DTranspose', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv'
'TreeConv'
] ]
...@@ -865,7 +864,7 @@ class Linear(layers.Layer): ...@@ -865,7 +864,7 @@ class Linear(layers.Layer):
where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively. where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively.
Different from FC layer, Linear layer takes only one ``Tensor`` input. Linear layer takes only one ``Tensor`` input.
The Linear layer multiplies input tensor with weight matrix and The Linear layer multiplies input tensor with weight matrix and
produces an output Tensor of shape [N, *, `output_dim`], produces an output Tensor of shape [N, *, `output_dim`],
where N is batch size and `*` means any number of additional dimensions. where N is batch size and `*` means any number of additional dimensions.
...@@ -959,221 +958,6 @@ class Linear(layers.Layer): ...@@ -959,221 +958,6 @@ class Linear(layers.Layer):
return self._helper.append_activation(pre_activation, act=self._act) return self._helper.append_activation(pre_activation, act=self._act)
class FC(layers.Layer):
"""
This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples.
It creates a fully connected layer in the network. It can take
one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [N, `size`],
where N is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
is not None, a bias variable will be created and added to the output.
Finally, if ``act`` is not None, it will be applied to the output as well.
When the input is single ``Tensor`` :
.. math::
Out = Act({XW + b})
When the input are multiple ``Tensor`` :
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
* :math:`X_i`: The i-th input ``Tensor`` .
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output ``Tensor`` .
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2]]]
data_1.shape = (1, 1, 2) # 1 is batch_size
data_2.data = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3) # 1 is batch_size
fc = FC("fc", 2, num_flatten_dims=2)
out = fc(input=[data_1, data_2])
Then:
out.data = [[[0.182996 -0.474117]]]
out.shape = (1, 1, 2)
Parameters:
name_scope(str): The name of this class.
size(int): The number of output units in this layer.
num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimension tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
weights(Parameter) of this layer. Default: None.
bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, optional): Activation to be applied to the output of this layer. Default: None.
is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
Attribute:
**weight** (list of Parameter): the learnable weights of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Returns:
None
Examples:
.. code-block:: python
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
from paddle.fluid.dygraph import FC
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
data = to_variable(data)
conv = fc(data)
"""
def __init__(self,
name_scope,
size,
num_flatten_dims=1,
param_attr=None,
bias_attr=None,
act=None,
is_test=False,
dtype="float32"):
super(FC, self).__init__(name_scope, dtype)
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self.__w = list()
def _build_once(self, input):
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
input_shape = inp.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
1)
] + [self._size]
self.__w.append(
self.add_parameter(
'_w%d' % i,
self.create_parameter(
attr=param,
shape=param_shape,
dtype=self._dtype,
is_bias=False)))
i += 1
size = list([self._size])
self._b = self.create_parameter(
attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
# TODO(songyouwei): We should remove _w property
@property
def _w(self, i=0):
return self.__w[i]
@_w.setter
def _w(self, value, i=0):
assert isinstance(self.__w[i], Variable)
self.__w[i].set_value(value)
@property
def weight(self):
if len(self.__w) > 1:
return self.__w
else:
return self.__w[0]
@weight.setter
def weight(self, value):
if len(self.__w) == 1:
self.__w[0] = value
@property
def bias(self):
return self._b
@bias.setter
def bias(self, value):
self._b = value
def forward(self, input):
mul_results = list()
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inp,
"Y": self.__w[i]},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
i += 1
mul_results.append(tmp)
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = self._helper.create_variable_for_type_inference(
self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": False})
if self._b:
pre_activation = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._b]},
outputs={'Out': [pre_activation]},
attrs={'axis': self._num_flatten_dims})
else:
pre_activation = pre_bias
# Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act)
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
""" """
This interface is used to construct a callable object of the ``BatchNorm`` class. This interface is used to construct a callable object of the ``BatchNorm`` class.
......
...@@ -97,7 +97,7 @@ class DataParallel(layers.Layer): ...@@ -97,7 +97,7 @@ class DataParallel(layers.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph import paddle.fluid.dygraph as dygraph
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import FC from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
place = fluid.CUDAPlace(0) place = fluid.CUDAPlace(0)
...@@ -106,28 +106,28 @@ class DataParallel(layers.Layer): ...@@ -106,28 +106,28 @@ class DataParallel(layers.Layer):
# prepare the data parallel context # prepare the data parallel context
strategy=dygraph.parallel.prepare_context() strategy=dygraph.parallel.prepare_context()
fc_layer = FC("FC", 10, act="softmax") linear = Linear(1, 10, act="softmax")
adam = fluid.optimizer.AdamOptimizer() adam = fluid.optimizer.AdamOptimizer()
# make the module become the data parallelism module # make the module become the data parallelism module
fc_layer = dygraph.parallel.DataParallel(fc_layer, strategy) linear = dygraph.parallel.DataParallel(linear, strategy)
x_data = np.random.random(size=[10, 1]).astype(np.float32) x_data = np.random.random(size=[10, 1]).astype(np.float32)
data = to_variable(x_data) data = to_variable(x_data)
hidden = fc_layer(data) hidden = linear(data)
avg_loss = fluid.layers.mean(hidden) avg_loss = fluid.layers.mean(hidden)
# scale the loss according to the number of trainers. # scale the loss according to the number of trainers.
avg_loss = fc_layer.scale_loss(avg_loss) avg_loss = linear.scale_loss(avg_loss)
avg_loss.backward() avg_loss.backward()
# collect the gradients of trainers. # collect the gradients of trainers.
fc_layer.apply_collective_grads() linear.apply_collective_grads()
adam.minimize(avg_loss) adam.minimize(avg_loss)
fc_layer.clear_gradients() linear.clear_gradients()
Args: Args:
layers(Layer): The module that should be executed by data parallel. layers(Layer): The module that should be executed by data parallel.
......
...@@ -39,17 +39,17 @@ def monkey_patch_varbase(): ...@@ -39,17 +39,17 @@ def monkey_patch_varbase():
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.ones([3, 32, 32], dtype='float32') data = np.ones([3, 1024], dtype='float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = fluid.dygraph.FC("fc", 4) linear = fluid.dygraph.Linear(1024, 4)
t = to_variable(data) t = to_variable(data)
fc(t) # call with default weight linear(t) # call with default weight
custom_weight = np.random.randn(1024, 4).astype("float32") custom_weight = np.random.randn(1024, 4).astype("float32")
fc.weight.set_value(custom_weight) # change existing weight linear.weight.set_value(custom_weight) # change existing weight
out = fc(t) # call with different weight out = linear(t) # call with different weight
""" """
assert isinstance(value, (np.ndarray, core.VarBase)), \ assert isinstance(value, (np.ndarray, core.VarBase)), \
......
...@@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase): ...@@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
...@@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase): ...@@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10) linear = Linear( 10, 10)
out = fc( to_variable(init_value) ) out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out ) loss = fluid.layers.reduce_mean( out )
...@@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase): ...@@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
...@@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase): ...@@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10) linear = Linear( 10, 10)
out = fc( to_variable(init_value) ) out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out ) loss = fluid.layers.reduce_mean( out )
...@@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase): ...@@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.nn import FC from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm
...@@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase): ...@@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase):
init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32')
fc = FC( "fc", 10) linear = Linear( 10, 10)
out = fc( to_variable(init_value) ) out = linear( to_variable(init_value) )
loss = fluid.layers.reduce_mean( out ) loss = fluid.layers.reduce_mean( out )
......
...@@ -959,14 +959,14 @@ class Variable(object): ...@@ -959,14 +959,14 @@ class Variable(object):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2) linear = Linear(32, 64)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
y = x.detach() y = x.detach()
""" """
...@@ -991,14 +991,14 @@ class Variable(object): ...@@ -991,14 +991,14 @@ class Variable(object):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2) linear = Linear(32, 64)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
print(x.numpy()) print(x.numpy())
""" """
...@@ -1020,17 +1020,17 @@ class Variable(object): ...@@ -1020,17 +1020,17 @@ class Variable(object):
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import FC from paddle.fluid.dygraph import Linear
import numpy as np import numpy as np
data = np.ones([3, 32, 32], dtype='float32') data = np.ones([3, 1024], dtype='float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc = fluid.dygraph.FC("fc", 4) linear = fluid.dygraph.Linear(1024, 4)
t = to_variable(data) t = to_variable(data)
fc(t) # call with default weight linear(t) # call with default weight
custom_weight = np.random.randn(1024, 4).astype("float32") custom_weight = np.random.randn(1024, 4).astype("float32")
fc.weight.set_value(custom_weight) # change existing weight linear.weight.set_value(custom_weight) # change existing weight
out = fc(t) # call with different weight out = linear(t) # call with different weight
""" """
pass pass
...@@ -1223,18 +1223,18 @@ class Variable(object): ...@@ -1223,18 +1223,18 @@ class Variable(object):
value0 = np.arange(26).reshape(2, 13).astype("float32") value0 = np.arange(26).reshape(2, 13).astype("float32")
value1 = np.arange(6).reshape(2, 3).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32")
value2 = np.arange(10).reshape(2, 5).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32")
fc = fluid.FC("fc1", size=5, dtype="float32") linear = fluid.Linear(13, 5, dtype="float32")
fc2 = fluid.FC("fc2", size=3, dtype="float32") linear2 = fluid.Linear(3, 3, dtype="float32")
a = fluid.dygraph.to_variable(value0) a = fluid.dygraph.to_variable(value0)
b = fluid.dygraph.to_variable(value1) b = fluid.dygraph.to_variable(value1)
c = fluid.dygraph.to_variable(value2) c = fluid.dygraph.to_variable(value2)
out1 = fc(a) out1 = linear(a)
out2 = fc2(b) out2 = linear2(b)
out1.stop_gradient = True out1.stop_gradient = True
out = fluid.layers.concat(input=[out1, out2, c], axis=1) out = fluid.layers.concat(input=[out1, out2, c], axis=1)
out.backward() out.backward()
assert (fc._w.gradient() == 0).all() assert (linear.weight.gradient() == 0).all()
assert (out1.gradient() == 0).all() assert (out1.gradient() == 0).all()
""" """
if in_dygraph_mode(): if in_dygraph_mode():
......
...@@ -30,14 +30,15 @@ __all__ = ['run_check'] ...@@ -30,14 +30,15 @@ __all__ = ['run_check']
class SimpleLayer(Layer): class SimpleLayer(Layer):
def __init__(self, name_scope): def __init__(self, input_size):
super(SimpleLayer, self).__init__(name_scope) super(SimpleLayer, self).__init__()
self._fc1 = nn.FC(self.full_name(), self._linear1 = nn.Linear(
3, input_size,
param_attr=ParamAttr(initializer=Constant(value=0.1))) 3,
param_attr=ParamAttr(initializer=Constant(value=0.1)))
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._linear1(inputs)
x = layers.reduce_sum(x) x = layers.reduce_sum(x)
return x return x
...@@ -79,7 +80,7 @@ def run_check(): ...@@ -79,7 +80,7 @@ def run_check():
build_strategy = compiler.BuildStrategy() build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = True build_strategy.enable_inplace = True
inp = layers.data(name="inp", shape=[2, 2]) inp = layers.data(name="inp", shape=[2, 2])
simple_layer = SimpleLayer("simple_layer") simple_layer = SimpleLayer(input_size=2)
out = simple_layer(inp) out = simple_layer(inp)
exe = executor.Executor( exe = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and core.CUDAPlace(0) if core.is_compiled_with_cuda() and
...@@ -108,10 +109,11 @@ def run_check(): ...@@ -108,10 +109,11 @@ def run_check():
with unique_name.guard(): with unique_name.guard():
inp0 = layers.data( inp0 = layers.data(
name="inp", shape=[2, 2], append_batch_size=False) name="inp", shape=[2, 2], append_batch_size=False)
simple_layer0 = SimpleLayer("simple_layer") simple_layer0 = SimpleLayer(input_size=2)
out0 = simple_layer0(inp0) out0 = simple_layer0(inp0)
param_grads = backward.append_backward( param_grads = backward.append_backward(
out0, parameter_list=[simple_layer0._fc1._w.name])[0] out0,
parameter_list=[simple_layer0._linear1.weight.name])[0]
exe0 = executor.Executor( exe0 = executor.Executor(
core.CUDAPlace(0) if core.is_compiled_with_cuda() and core.CUDAPlace(0) if core.is_compiled_with_cuda() and
(core.get_cuda_device_count() > 0) else core.CPUPlace()) (core.get_cuda_device_count() > 0) else core.CPUPlace())
......
...@@ -3002,7 +3002,7 @@ def layer_norm(input, ...@@ -3002,7 +3002,7 @@ def layer_norm(input,
print(output) print(output)
""" """
assert in_dygraph_mode( assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!" ) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!"
helper = LayerHelper('layer_norm', **locals()) helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
......
...@@ -17,8 +17,7 @@ from __future__ import print_function ...@@ -17,8 +17,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import FC from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import unittest import unittest
...@@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase): ...@@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase):
def no_detach_multi(self): def no_detach_multi(self):
data = self.generate_Data() data = self.generate_Data()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr( linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0)) initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr( linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0)) initializer=fluid.initializer.Constant(6.0))
fc = FC("fc", linear = Linear(
10, 4,
num_flatten_dims=1, 10,
param_attr=fc_w_param_attrs, param_attr=linear_w_param_attrs,
bias_attr=fc_b_param_attrs) bias_attr=linear_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr( linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0)) initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr( linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0)) initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc", linear1 = Linear(
1, 10,
num_flatten_dims=1, 1,
param_attr=fc1_w_param_attrs, param_attr=linear1_w_param_attrs,
bias_attr=fc1_b_param_attrs) bias_attr=linear1_b_param_attrs)
fc2_w_param_attrs = fluid.ParamAttr( linear2_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(9.0)) initializer=fluid.initializer.Constant(9.0))
fc2_b_param_attrs = fluid.ParamAttr( linear2_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(10.0)) initializer=fluid.initializer.Constant(10.0))
fc2 = FC("fc", linear2 = Linear(
1, 10,
num_flatten_dims=1, 1,
param_attr=fc2_w_param_attrs, param_attr=linear2_w_param_attrs,
bias_attr=fc2_b_param_attrs) bias_attr=linear2_b_param_attrs)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
x1 = fc1(x) x1 = linear1(x)
x2 = fc2(x) x2 = linear2(x)
loss = x1 + x2 loss = x1 + x2
# print(loss, loss.shape) # print(loss, loss.shape)
loss.backward() loss.backward()
...@@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase): ...@@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase):
def no_detach_single(self): def no_detach_single(self):
data = self.generate_Data() data = self.generate_Data()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr( linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0)) initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr( linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0)) initializer=fluid.initializer.Constant(6.0))
fc = FC("fc", linear = Linear(
10, 4,
num_flatten_dims=1, 10,
param_attr=fc_w_param_attrs, param_attr=linear_w_param_attrs,
bias_attr=fc_b_param_attrs) bias_attr=linear_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr( linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0)) initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr( linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0)) initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc", linear1 = Linear(
1, 10,
num_flatten_dims=1, 1,
param_attr=fc1_w_param_attrs, param_attr=linear1_w_param_attrs,
bias_attr=fc1_b_param_attrs) bias_attr=linear1_b_param_attrs)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
x1 = fc1(x) x1 = linear1(x)
loss = x1 loss = x1
# print(loss, loss.shape) # print(loss, loss.shape)
loss.backward() loss.backward()
...@@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase): ...@@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase):
def detach_multi(self): def detach_multi(self):
data = self.generate_Data() data = self.generate_Data()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
fc_w_param_attrs = fluid.ParamAttr( linear_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(5.0)) initializer=fluid.initializer.Constant(5.0))
fc_b_param_attrs = fluid.ParamAttr( linear_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(6.0)) initializer=fluid.initializer.Constant(6.0))
fc = FC("fc", linear = Linear(
10, 4,
num_flatten_dims=1, 10,
param_attr=fc_w_param_attrs, param_attr=linear_w_param_attrs,
bias_attr=fc_b_param_attrs) bias_attr=linear_b_param_attrs)
fc1_w_param_attrs = fluid.ParamAttr( linear1_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(7.0)) initializer=fluid.initializer.Constant(7.0))
fc1_b_param_attrs = fluid.ParamAttr( linear1_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(8.0)) initializer=fluid.initializer.Constant(8.0))
fc1 = FC("fc", linear1 = Linear(
1, 10,
num_flatten_dims=1, 1,
param_attr=fc1_w_param_attrs, param_attr=linear1_w_param_attrs,
bias_attr=fc1_b_param_attrs) bias_attr=linear1_b_param_attrs)
fc2_w_param_attrs = fluid.ParamAttr( linear2_w_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(9.0)) initializer=fluid.initializer.Constant(9.0))
fc2_b_param_attrs = fluid.ParamAttr( linear2_b_param_attrs = fluid.ParamAttr(
initializer=fluid.initializer.Constant(10.0)) initializer=fluid.initializer.Constant(10.0))
fc2 = FC("fc", linear2 = Linear(
1, 10,
num_flatten_dims=1, 1,
param_attr=fc2_w_param_attrs, param_attr=linear2_w_param_attrs,
bias_attr=fc2_b_param_attrs) bias_attr=linear2_b_param_attrs)
data = to_variable(data) data = to_variable(data)
x = fc(data) x = linear(data)
x_detach = x.detach() x_detach = x.detach()
x1 = fc1(x) x1 = linear1(x)
x2 = fc2(x_detach) x2 = linear2(x_detach)
loss = x1 + x2 loss = x1 + x2
# print(loss, loss.shape) # print(loss, loss.shape)
loss.backward() loss.backward()
......
...@@ -18,7 +18,7 @@ import unittest ...@@ -18,7 +18,7 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
...@@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope, dtype="float32"): def __init__(self, dtype="float32"):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
num_channels=3, num_channels=3,
...@@ -94,21 +94,23 @@ class MNIST(fluid.dygraph.Layer): ...@@ -94,21 +94,23 @@ class MNIST(fluid.dygraph.Layer):
dtype=dtype, dtype=dtype,
use_cudnn=True) use_cudnn=True)
pool_2_shape = 50 * 4 * 4 self.pool_2_shape = 50 * 53 * 53
SIZE = 10 SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(), self._linear = Linear(
10, self.pool_2_shape,
param_attr=fluid.param_attr.ParamAttr( 10,
initializer=fluid.initializer.NormalInitializer( param_attr=fluid.param_attr.ParamAttr(
loc=0.0, scale=scale)), initializer=fluid.initializer.NormalInitializer(
act="softmax", loc=0.0, scale=scale)),
dtype=dtype) act="softmax",
dtype=dtype)
def forward(self, inputs, label): def forward(self, inputs, label):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x) x = self._simple_img_conv_pool_2(x)
cost = self._fc(x) x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
cost = self._linear(x)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
return avg_loss return avg_loss
...@@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase): ...@@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase):
x = np.random.randn(1, 3, 224, 224).astype("float16") x = np.random.randn(1, 3, 224, 224).astype("float16")
y = np.random.randn(1, 1).astype("int64") y = np.random.randn(1, 1).astype("int64")
with fluid.dygraph.guard(fluid.CUDAPlace(0)): with fluid.dygraph.guard(fluid.CUDAPlace(0)):
model = MNIST("mnist", dtype="float16") model = MNIST(dtype="float16")
x = fluid.dygraph.to_variable(x) x = fluid.dygraph.to_variable(x)
y = fluid.dygraph.to_variable(y) y = fluid.dygraph.to_variable(y)
loss = model(x, y) loss = model(x, y)
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid import FC from paddle.fluid import Linear
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
...@@ -35,24 +35,26 @@ class MyLayer(fluid.Layer): ...@@ -35,24 +35,26 @@ class MyLayer(fluid.Layer):
class MLP(fluid.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, input_size):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__()
self._fc1 = FC(self.full_name(), self._linear1 = Linear(
3, input_size,
param_attr=fluid.ParamAttr( 3,
initializer=fluid.initializer.Constant(value=0.1)), param_attr=fluid.ParamAttr(
bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)),
initializer=fluid.initializer.Constant(value=0.1))) bias_attr=fluid.ParamAttr(
self._fc2 = FC(self.full_name(), initializer=fluid.initializer.Constant(value=0.1)))
4, self._linear2 = Linear(
param_attr=fluid.ParamAttr( 3,
initializer=fluid.initializer.Constant(value=0.1)), 4,
bias_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._linear1(inputs)
x = self._fc2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = fluid.layers.reduce_sum(x)
return x return x
...@@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase): ...@@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
mlp = MLP("mlp") mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out = out.numpy() dy_out = out.numpy()
out.backward() out.backward()
dy_grad = mlp._fc1.weight.gradient() dy_grad = mlp._linear1.weight.gradient()
with fluid.dygraph.guard(): with fluid.dygraph.guard():
var_inp2 = fluid.dygraph.base.to_variable(np_inp) var_inp2 = fluid.dygraph.base.to_variable(np_inp)
mlp2 = MLP("mlp") mlp2 = MLP(input_size=2)
out2 = mlp2(var_inp2) out2 = mlp2(var_inp2)
dy_out2 = out2.numpy() dy_out2 = out2.numpy()
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
out2.backward(backward_strategy) out2.backward(backward_strategy)
dy_grad2 = mlp2._fc1.weight.gradient() dy_grad2 = mlp2._linear1.weight.gradient()
with new_program_scope(): with new_program_scope():
inp = fluid.layers.data( inp = fluid.layers.data(
name="inp", shape=[2, 2], append_batch_size=False) name="inp", shape=[2, 2], append_batch_size=False)
mlp = MLP("mlp") mlp = MLP(input_size=2)
out = mlp(inp) out = mlp(inp)
param_grads = fluid.backward.append_backward( param_grads = fluid.backward.append_backward(
out, parameter_list=[mlp._fc1.weight.name])[0] out, parameter_list=[mlp._linear1.weight.name])[0]
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase): ...@@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(dy_grad2, static_grad)) self.assertTrue(np.allclose(dy_grad2, static_grad))
params = mlp.parameters(True) params = mlp.parameters(True)
self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name) self.assertEqual("linear_0.w_0", params[0].name)
self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name) self.assertEqual("linear_0.b_0", params[1].name)
self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name) self.assertEqual("linear_1.w_0", params[2].name)
self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name) self.assertEqual("linear_1.b_0", params[3].name)
self.assertEqual(len(params), 4) self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True) sublayers = mlp.sublayers(True)
self.assertEqual(mlp._fc1, sublayers[0]) self.assertEqual(mlp._linear1, sublayers[0])
self.assertEqual(mlp._fc2, sublayers[1]) self.assertEqual(mlp._linear2, sublayers[1])
self.assertEqual(len(sublayers), 2) self.assertEqual(len(sublayers), 2)
def test_dygraph_vs_static(self): def test_dygraph_vs_static(self):
......
...@@ -20,17 +20,17 @@ import numpy as np ...@@ -20,17 +20,17 @@ import numpy as np
class MLP(fluid.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, input_size):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__()
self._fc1 = fluid.dygraph.FC( self._linear1 = fluid.dygraph.Linear(
self.full_name(), input_size,
3, 3,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)), initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC( self._linear2 = fluid.dygraph.Linear(
self.full_name(), 3,
4, 4,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)), initializer=fluid.initializer.Constant(value=0.1)),
...@@ -38,8 +38,8 @@ class MLP(fluid.Layer): ...@@ -38,8 +38,8 @@ class MLP(fluid.Layer):
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._linear1(inputs)
x = self._fc2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = fluid.layers.reduce_sum(x)
return x return x
...@@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase): ...@@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase):
trace_var = 0 trace_var = 0
alive_var = 0 alive_var = 0
with fluid.dygraph.guard(): with fluid.dygraph.guard():
mlp = MLP("mlp") mlp = MLP(input_size=2)
for i in range(10): for i in range(10):
var_inp = fluid.dygraph.base.to_variable(np_inp) var_inp = fluid.dygraph.base.to_variable(np_inp)
out = mlp(var_inp) out = mlp(var_inp)
......
...@@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope ...@@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope
class MLP(fluid.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self, input_size):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__()
self._fc1 = fluid.dygraph.FC( self._linear1 = fluid.dygraph.Linear(
self.full_name(), input_size,
3, 3,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)), initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC( self._linear2 = fluid.dygraph.Linear(
self.full_name(), 3,
4, 4,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)), initializer=fluid.initializer.Constant(value=0.1)),
...@@ -39,8 +39,8 @@ class MLP(fluid.Layer): ...@@ -39,8 +39,8 @@ class MLP(fluid.Layer):
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._linear1(inputs)
x = self._fc2(x) x = self._linear2(x)
x = fluid.layers.reduce_sum(x) x = fluid.layers.reduce_sum(x)
return x return x
...@@ -48,7 +48,7 @@ class MLP(fluid.Layer): ...@@ -48,7 +48,7 @@ class MLP(fluid.Layer):
class TestDygraphFramework(unittest.TestCase): class TestDygraphFramework(unittest.TestCase):
def test_dygraph_backward(self): def test_dygraph_backward(self):
with new_program_scope(): with new_program_scope():
mlp = MLP("mlp") mlp = MLP(input_size=2)
var_inp = fluid.layers.data( var_inp = fluid.layers.data(
"input", shape=[2, 2], dtype="float32", append_batch_size=False) "input", shape=[2, 2], dtype="float32", append_batch_size=False)
out = mlp(var_inp) out = mlp(var_inp)
......
...@@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase): ...@@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.random.randn(2, 4, 5).astype("float32") x = np.random.randn(2, 4, 5).astype("float32")
x = fluid.dygraph.to_variable(x) x = fluid.dygraph.to_variable(x)
fc1 = fluid.dygraph.FC("fc1", 10, num_flatten_dims=2) linear1 = fluid.dygraph.Linear(5, 10)
fc2 = fluid.dygraph.FC("fc2", 10, num_flatten_dims=2) linear2 = fluid.dygraph.Linear(5, 10)
y = fc1(x[:, :2]) y = linear1(x[:, :2])
z = fc2(x[:, 2:]) z = linear2(x[:, 2:])
loss = fluid.layers.reduce_mean(y) loss = fluid.layers.reduce_mean(y)
loss.backward() loss.backward()
for param in fc1.parameters(): for param in linear1.parameters():
self.assertIsNotNone(param._grad_ivar()) self.assertIsNotNone(param._grad_ivar())
for param in fc2.parameters(): for param in linear2.parameters():
self.assertIsNone(param._grad_ivar()) self.assertIsNone(param._grad_ivar())
optimizer = fluid.optimizer.AdamOptimizer(parameter_list=( optimizer = fluid.optimizer.AdamOptimizer(parameter_list=(
fc1.parameters() + fc2.parameters())) linear1.parameters() + linear2.parameters()))
_, params_grads = optimizer.minimize(loss) _, params_grads = optimizer.minimize(loss)
self.assertListEqual( self.assertListEqual(
sorted([p.name for p in fc1.parameters()]), sorted([p.name for p in linear1.parameters()]),
sorted([p_g[0].name for p_g in params_grads])) sorted([p_g[0].name for p_g in params_grads]))
fc1.clear_gradients() linear1.clear_gradients()
fc2.clear_gradients() linear2.clear_gradients()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -23,18 +23,18 @@ import paddle ...@@ -23,18 +23,18 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
import paddle.fluid.dygraph.nn as nn import paddle.fluid.dygraph.nn as nn
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class Policy(fluid.dygraph.Layer): class Policy(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self, input_size):
super(Policy, self).__init__(name_scope) super(Policy, self).__init__()
self.affine1 = nn.FC(self.full_name(), size=128) self.affine1 = nn.Linear(input_size, 128)
self.affine2 = nn.FC(self.full_name(), size=2) self.affine2 = nn.Linear(128, 2)
self.dropout_ratio = 0.6 self.dropout_ratio = 0.6
self.saved_log_probs = [] self.saved_log_probs = []
...@@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
policy = Policy("PolicyModel") policy = Policy(input_size=4)
dy_state = fluid.dygraph.base.to_variable(state) dy_state = fluid.dygraph.base.to_variable(state)
dy_state.stop_gradient = True dy_state.stop_gradient = True
...@@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
policy = Policy("PolicyModel") policy = Policy(input_size=4)
st_sgd = SGDOptimizer(learning_rate=1e-3) st_sgd = SGDOptimizer(learning_rate=1e-3)
......
...@@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer): ...@@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer):
class PtbModel(fluid.Layer): class PtbModel(fluid.Layer):
def __init__(self, def __init__(self,
name_scope,
hidden_size, hidden_size,
vocab_size, vocab_size,
num_layers=2, num_layers=2,
num_steps=20, num_steps=20,
init_scale=0.1, init_scale=0.1,
dropout=None): dropout=None):
super(PtbModel, self).__init__(name_scope) super(PtbModel, self).__init__()
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.init_scale = init_scale self.init_scale = init_scale
...@@ -160,7 +159,18 @@ class PtbModel(fluid.Layer): ...@@ -160,7 +159,18 @@ class PtbModel(fluid.Layer):
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))) low=-init_scale, high=init_scale)))
self.out_project = Linear(self.hidden_size, self.vocab_size) self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(),
shape=[self.vocab_size],
dtype="float32",
default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape( init_h = fluid.layers.reshape(
...@@ -182,7 +192,8 @@ class PtbModel(fluid.Layer): ...@@ -182,7 +192,8 @@ class PtbModel(fluid.Layer):
rnn_out = fluid.layers.reshape( rnn_out = fluid.layers.reshape(
rnn_out, shape=[-1, self.num_steps, self.hidden_size]) rnn_out, shape=[-1, self.num_steps, self.hidden_size])
projection = self.out_project(rnn_out) projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
projection = fluid.layers.reshape( projection = fluid.layers.reshape(
projection, shape=[-1, self.vocab_size]) projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy( loss = fluid.layers.softmax_with_cross_entropy(
...@@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
...@@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
......
...@@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase): ...@@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase):
class TestLayer(LayerTest): class TestLayer(LayerTest):
def test_custom_layer_with_kwargs(self): def test_custom_layer_with_kwargs(self):
class CustomLayer(fluid.Layer): class CustomLayer(fluid.Layer):
def __init__(self, name_scope, fc1_size=4): def __init__(self, input_size, linear1_size=4):
super(CustomLayer, self).__init__(name_scope) super(CustomLayer, self).__init__()
self.fc1 = nn.FC('fc1', self.linear1 = nn.Linear(
size=fc1_size, input_size, linear1_size, bias_attr=False)
bias_attr=False, self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False)
num_flatten_dims=1)
self.fc2 = nn.FC('fc2', def forward(self, x, do_linear2=False):
size=1, ret = self.linear1(x)
bias_attr=False, if do_linear2:
num_flatten_dims=1) ret = self.linear2(ret)
def forward(self, x, do_fc2=False):
ret = self.fc1(x)
if do_fc2:
ret = self.fc2(ret)
return ret return ret
with self.dynamic_graph(): with self.dynamic_graph():
inp = np.ones([3, 3], dtype='float32') inp = np.ones([3, 3], dtype='float32')
x = base.to_variable(inp) x = base.to_variable(inp)
custom = CustomLayer('custom', fc1_size=2) custom = CustomLayer(input_size=3, linear1_size=2)
ret = custom(x, do_fc2=False) ret = custom(x, do_linear2=False)
self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2])) self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2]))
ret = custom(x, do_fc2=True) ret = custom(x, do_linear2=True)
self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1])) self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1]))
def test_linear(self): def test_linear(self):
...@@ -133,112 +128,6 @@ class TestLayer(LayerTest): ...@@ -133,112 +128,6 @@ class TestLayer(LayerTest):
self.assertTrue(np.array_equal(static_ret, dy_ret_value)) self.assertTrue(np.array_equal(static_ret, dy_ret_value))
inp = np.ones([3, 32], dtype='float32')
with self.dynamic_graph():
t = base.to_variable(inp)
linear = nn.Linear(32, 4, bias_attr=False)
dy_ret = linear(t)
dy_ret_value = dy_ret.numpy()
with self.dynamic_graph():
t = base.to_variable(inp)
fc = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
dy_ret2 = fc(t)
dy_ret_value2 = dy_ret2.numpy()
self.assertTrue(np.array_equal(dy_ret_value, dy_ret_value2))
def test_fc(self):
inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
ret = layers.fc(t, size=4, bias_attr=False, num_flatten_dims=1)
ret2 = layers.fc(ret, size=4)
static_ret = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret2])[0]
with self.static_graph():
t = layers.data(
name='data',
shape=[3, 32, 32],
dtype='float32',
append_batch_size=False)
fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = nn.FC('fc2', size=4)
ret = fc1(t)
ret2 = fc2(ret)
static_ret2 = self.get_static_graph_result(
feed={'data': inp}, fetch_list=[ret2])[0]
with self.dynamic_graph():
t = base.to_variable(inp)
fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1)
fc2 = nn.FC('fc2', size=4)
ret = fc1(t)
dy_ret = fc2(ret)
dy_ret_value = dy_ret.numpy()
self.assertTrue(np.array_equal(static_ret, static_ret2))
self.assertTrue(np.array_equal(static_ret, dy_ret_value))
with self.dynamic_graph():
custom_weight = np.random.randn(1024, 4).astype("float32")
weight_attr1 = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight))
fc1 = fluid.dygraph.FC("fc1",
4,
num_flatten_dims=1,
param_attr=weight_attr1)
out1 = fc1(base.to_variable(inp))
loss1 = fluid.layers.reduce_mean(out1)
fc1_weight_init = fc1.weight.detach()
fc1_bias_init = fc1.bias.detach()
loss1.backward()
optimizer1 = fluid.optimizer.SGD(learning_rate=0.1,
parameter_list=fc1.parameters())
optimizer1.minimize(loss1)
fc1_weight_updated = fc1.weight.detach()
with self.dynamic_graph():
weight_attr2 = fluid.ParamAttr(
initializer=fluid.initializer.Uniform())
fc2 = fluid.dygraph.FC("fc2",
4,
num_flatten_dims=1,
param_attr=weight_attr2)
out2 = fc2(base.to_variable(inp))
self.assertFalse(
np.array_equal(fc1_weight_init.numpy(), fc2.weight.numpy()))
self.assertFalse(np.array_equal(out1.numpy(), out2.numpy()))
mismatched_weight = np.random.randn(4, 4).astype("float32")
with self.assertRaises(AssertionError):
fc2.weight.set_value(mismatched_weight)
fc2.weight.set_value(fc1_weight_init)
fc2.bias.set_value(fc1_bias_init)
out2 = fc2(base.to_variable(inp))
loss2 = fluid.layers.reduce_mean(out2)
loss2.backward()
optimizer2 = fluid.optimizer.SGD(learning_rate=0.1,
parameter_list=fc2.parameters())
optimizer2.minimize(loss2)
self.assertTrue(
np.array_equal(fc2.weight.numpy(), fc1_weight_updated.numpy()))
self.assertTrue(np.array_equal(out1.numpy(), out2.numpy()))
fc2.weight = fc1.weight
fc2.bias = fc1.bias
self.assertTrue(
np.array_equal(fc2.weight.numpy(), fc1.weight.numpy()))
self.assertTrue(np.array_equal(fc2.bias.numpy(), fc1.bias.numpy()))
def test_layer_norm(self): def test_layer_norm(self):
inp = np.ones([3, 32, 32], dtype='float32') inp = np.ones([3, 32, 32], dtype='float32')
with self.static_graph(): with self.static_graph():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册