From cf475f95dfa1c28362b4fdb94930824054c3dce4 Mon Sep 17 00:00:00 2001 From: zhongpu <2013000149@qq.com> Date: Wed, 8 Jan 2020 12:35:04 +0800 Subject: [PATCH] Remove FC in dygraph, modify FC to Linear in sample code (#22082) * modify fc to linear in sample code, test=develop * remove FC, test=develop * remove warnings, test=develop * drop fluid/imperative/README.md , test=develop * change fc to linear, test=develop * polish code style, test=develop --- paddle/fluid/pybind/imperative.cc | 12 +- python/paddle/fluid/dygraph/base.py | 20 +- python/paddle/fluid/dygraph/nn.py | 224 +----------------- python/paddle/fluid/dygraph/parallel.py | 14 +- .../fluid/dygraph/varbase_patch_methods.py | 12 +- python/paddle/fluid/dygraph_grad_clip.py | 18 +- python/paddle/fluid/framework.py | 34 +-- python/paddle/fluid/install_check.py | 20 +- python/paddle/fluid/layers/nn.py | 2 +- .../fluid/tests/unittests/test_detach.py | 131 +++++----- .../unittests/test_dygraph_mnist_fp16.py | 30 +-- .../unittests/test_imperative_auto_prune.py | 213 +++++++++-------- .../tests/unittests/test_imperative_basic.py | 60 ++--- .../unittests/test_imperative_debug_string.py | 18 +- .../unittests/test_imperative_framework.py | 18 +- .../test_imperative_partitial_backward.py | 20 +- .../test_imperative_reinforcement.py | 14 +- .../unittests/test_imperative_save_load.py | 26 +- .../fluid/tests/unittests/test_layers.py | 137 +---------- 19 files changed, 355 insertions(+), 668 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index fe4debda1f9..b7c68610cc2 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -340,14 +340,14 @@ void BindImperative(py::module *m_ptr) { import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') with fluid.dygraph.guard(): - fc = FC("fc", 64, num_flatten_dims=2) + linear = Linear(32, 64) data = to_variable(data) - x = fc(data) + x = linear(data) print(x.numpy()) )DOC") @@ -374,14 +374,14 @@ void BindImperative(py::module *m_ptr) { import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') with fluid.dygraph.guard(): - fc = FC("fc", 64, num_flatten_dims=2) + linear = Linear(32, 64) data = to_variable(data) - x = fc(data) + x = linear(data) y = x.detach() )DOC") diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 674c556daa2..a59fad7e556 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -84,12 +84,12 @@ def _no_grad_(func): @fluid.dygraph.no_grad def test_layer(): with fluid.dygraph.guard(): - inp = np.ones([3, 32, 32], dtype='float32') + inp = np.ones([3, 1024], dtype='float32') t = fluid.dygraph.base.to_variable(inp) - fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) - fc2 = fluid.FC('fc2', size=4) - ret = fc1(t) - dy_ret = fc2(ret) + linear1 = fluid.Linear(1024, 4, bias_attr=False) + linear2 = fluid.Linear(4, 4) + ret = linear1(t) + dy_ret = linear2(ret) test_layer() @@ -127,12 +127,12 @@ def guard(place=None): import paddle.fluid as fluid with fluid.dygraph.guard(): - inp = np.ones([3, 32, 32], dtype='float32') + inp = np.ones([3, 1024], dtype='float32') t = fluid.dygraph.base.to_variable(inp) - fc1 = fluid.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) - fc2 = fluid.FC('fc2', size=4) - ret = fc1(t) - dy_ret = fc2(ret) + linear1 = fluid.Linear(1024, 4, bias_attr=False) + linear2 = fluid.Linear(4, 4) + ret = linear1(t) + dy_ret = linear2(ret) """ train = framework.Program() diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 08a36a9265d..b38e405ff62 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -29,10 +29,9 @@ import numbers import logging __all__ = [ - 'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'Linear', 'BatchNorm', 'Embedding', - 'GRUUnit', 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', - 'Conv2DTranspose', 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', - 'TreeConv' + 'Conv2D', 'Conv3D', 'Pool2D', 'Linear', 'BatchNorm', 'Embedding', 'GRUUnit', + 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', + 'Conv3DTranspose', 'GroupNorm', 'SpectralNorm', 'TreeConv' ] @@ -865,7 +864,7 @@ class Linear(layers.Layer): where :math:`X` is the input Tensor, :math:`W` and :math:`b` are weight and bias respectively. - Different from FC layer, Linear layer takes only one ``Tensor`` input. + Linear layer takes only one ``Tensor`` input. The Linear layer multiplies input tensor with weight matrix and produces an output Tensor of shape [N, *, `output_dim`], where N is batch size and `*` means any number of additional dimensions. @@ -959,221 +958,6 @@ class Linear(layers.Layer): return self._helper.append_activation(pre_activation, act=self._act) -class FC(layers.Layer): - """ - This interface is used to construct a callable object of the ``FC`` class. - For more details, refer to code examples. - It creates a fully connected layer in the network. It can take - one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor, - which represents a fully connected weight matrix from each input unit to - each output unit. The fully connected layer multiplies each input tensor - with its corresponding weight to produce an output Tensor with shape [N, `size`], - where N is batch size. If multiple input tensors are given, the results of - multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr`` - is not None, a bias variable will be created and added to the output. - Finally, if ``act`` is not None, it will be applied to the output as well. - - When the input is single ``Tensor`` : - - .. math:: - - Out = Act({XW + b}) - - When the input are multiple ``Tensor`` : - - .. math:: - - Out = Act({\sum_{i=0}^{N-1}X_iW_i + b}) - - In the above equation: - - * :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` . - * :math:`X_i`: The i-th input ``Tensor`` . - * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor. - * :math:`b`: The bias parameter created by this layer (if needed). - * :math:`Act`: The activation function. - * :math:`Out`: The output ``Tensor`` . - - See below for an example. - - .. code-block:: text - - Given: - data_1.data = [[[0.1, 0.2]]] - data_1.shape = (1, 1, 2) # 1 is batch_size - - data_2.data = [[[0.1, 0.2, 0.3]]] - data_2.shape = (1, 1, 3) # 1 is batch_size - - fc = FC("fc", 2, num_flatten_dims=2) - out = fc(input=[data_1, data_2]) - - Then: - out.data = [[[0.182996 -0.474117]]] - out.shape = (1, 1, 2) - - Parameters: - name_scope(str): The name of this class. - size(int): The number of output units in this layer. - num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than - two dimensions. If this happens, the multi-dimension tensor will first be flattened - into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input - tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) - dimensions will be flatten to form the first dimension of the final matrix (height of - the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to - form the second dimension of the final matrix (width of the matrix). For example, suppose - `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. - Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1 - param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable - weights(Parameter) of this layer. Default: None. - bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias - of this layer. If it is set to False, no bias will be added to the output units. - If it is set to None, the bias is initialized zero. Default: None. - act (str, optional): Activation to be applied to the output of this layer. Default: None. - is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False. - dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32". - - Attribute: - **weight** (list of Parameter): the learnable weights of this layer. - - **bias** (Parameter or None): the learnable bias of this layer. - - Returns: - None - - Examples: - .. code-block:: python - - from paddle.fluid.dygraph.base import to_variable - import paddle.fluid as fluid - from paddle.fluid.dygraph import FC - import numpy as np - - data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') - with fluid.dygraph.guard(): - fc = FC("fc", 64, num_flatten_dims=2) - data = to_variable(data) - conv = fc(data) - - """ - - def __init__(self, - name_scope, - size, - num_flatten_dims=1, - param_attr=None, - bias_attr=None, - act=None, - is_test=False, - dtype="float32"): - super(FC, self).__init__(name_scope, dtype) - - self._size = size - self._num_flatten_dims = num_flatten_dims - self._dtype = dtype - self._param_attr = param_attr - self._bias_attr = bias_attr - self._act = act - self.__w = list() - - def _build_once(self, input): - i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): - input_shape = inp.shape - - param_shape = [ - reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], - 1) - ] + [self._size] - self.__w.append( - self.add_parameter( - '_w%d' % i, - self.create_parameter( - attr=param, - shape=param_shape, - dtype=self._dtype, - is_bias=False))) - i += 1 - - size = list([self._size]) - self._b = self.create_parameter( - attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True) - - # TODO(songyouwei): We should remove _w property - @property - def _w(self, i=0): - return self.__w[i] - - @_w.setter - def _w(self, value, i=0): - assert isinstance(self.__w[i], Variable) - self.__w[i].set_value(value) - - @property - def weight(self): - if len(self.__w) > 1: - return self.__w - else: - return self.__w[0] - - @weight.setter - def weight(self, value): - if len(self.__w) == 1: - self.__w[0] = value - - @property - def bias(self): - return self._b - - @bias.setter - def bias(self, value): - self._b = value - - def forward(self, input): - mul_results = list() - i = 0 - for inp, param in self._helper.iter_inputs_and_params(input, - self._param_attr): - tmp = self._helper.create_variable_for_type_inference(self._dtype) - self._helper.append_op( - type="mul", - inputs={"X": inp, - "Y": self.__w[i]}, - outputs={"Out": tmp}, - attrs={ - "x_num_col_dims": self._num_flatten_dims, - "y_num_col_dims": 1 - }) - i += 1 - mul_results.append(tmp) - - if len(mul_results) == 1: - pre_bias = mul_results[0] - else: - pre_bias = self._helper.create_variable_for_type_inference( - self._dtype) - self._helper.append_op( - type="sum", - inputs={"X": mul_results}, - outputs={"Out": pre_bias}, - attrs={"use_mkldnn": False}) - - if self._b: - pre_activation = self._helper.create_variable_for_type_inference( - dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self._b]}, - outputs={'Out': [pre_activation]}, - attrs={'axis': self._num_flatten_dims}) - else: - pre_activation = pre_bias - # Currently, we don't support inplace in dygraph mode - return self._helper.append_activation(pre_activation, act=self._act) - - class BatchNorm(layers.Layer): """ This interface is used to construct a callable object of the ``BatchNorm`` class. diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index e8746a860c5..76a3d2c5dcb 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -97,7 +97,7 @@ class DataParallel(layers.Layer): import paddle.fluid as fluid import paddle.fluid.dygraph as dygraph from paddle.fluid.optimizer import AdamOptimizer - from paddle.fluid.dygraph.nn import FC + from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph.base import to_variable place = fluid.CUDAPlace(0) @@ -106,28 +106,28 @@ class DataParallel(layers.Layer): # prepare the data parallel context strategy=dygraph.parallel.prepare_context() - fc_layer = FC("FC", 10, act="softmax") + linear = Linear(1, 10, act="softmax") adam = fluid.optimizer.AdamOptimizer() # make the module become the data parallelism module - fc_layer = dygraph.parallel.DataParallel(fc_layer, strategy) + linear = dygraph.parallel.DataParallel(linear, strategy) x_data = np.random.random(size=[10, 1]).astype(np.float32) data = to_variable(x_data) - hidden = fc_layer(data) + hidden = linear(data) avg_loss = fluid.layers.mean(hidden) # scale the loss according to the number of trainers. - avg_loss = fc_layer.scale_loss(avg_loss) + avg_loss = linear.scale_loss(avg_loss) avg_loss.backward() # collect the gradients of trainers. - fc_layer.apply_collective_grads() + linear.apply_collective_grads() adam.minimize(avg_loss) - fc_layer.clear_gradients() + linear.clear_gradients() Args: layers(Layer): The module that should be executed by data parallel. diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 10f448fe807..1390919151a 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -39,17 +39,17 @@ def monkey_patch_varbase(): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np - data = np.ones([3, 32, 32], dtype='float32') + data = np.ones([3, 1024], dtype='float32') with fluid.dygraph.guard(): - fc = fluid.dygraph.FC("fc", 4) + linear = fluid.dygraph.Linear(1024, 4) t = to_variable(data) - fc(t) # call with default weight + linear(t) # call with default weight custom_weight = np.random.randn(1024, 4).astype("float32") - fc.weight.set_value(custom_weight) # change existing weight - out = fc(t) # call with different weight + linear.weight.set_value(custom_weight) # change existing weight + out = linear(t) # call with different weight """ assert isinstance(value, (np.ndarray, core.VarBase)), \ diff --git a/python/paddle/fluid/dygraph_grad_clip.py b/python/paddle/fluid/dygraph_grad_clip.py index 5e9c2a87b93..db7a76615f8 100644 --- a/python/paddle/fluid/dygraph_grad_clip.py +++ b/python/paddle/fluid/dygraph_grad_clip.py @@ -65,7 +65,7 @@ class GradClipByValue(GradClipBase): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph.nn import FC + from paddle.fluid.dygraph.nn import Linear from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm @@ -77,9 +77,9 @@ class GradClipByValue(GradClipBase): init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') - fc = FC( "fc", 10) + linear = Linear( 10, 10) - out = fc( to_variable(init_value) ) + out = linear( to_variable(init_value) ) loss = fluid.layers.reduce_mean( out ) @@ -144,7 +144,7 @@ class GradClipByNorm(GradClipBase): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph.nn import FC + from paddle.fluid.dygraph.nn import Linear from paddle.fluid.clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm @@ -156,9 +156,9 @@ class GradClipByNorm(GradClipBase): init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') - fc = FC( "fc", 10) + linear = Linear( 10, 10) - out = fc( to_variable(init_value) ) + out = linear( to_variable(init_value) ) loss = fluid.layers.reduce_mean( out ) @@ -222,7 +222,7 @@ class GradClipByGlobalNorm(GradClipBase): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph.nn import FC + from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph_grad_clip import GradClipByValue, GradClipByNorm, GradClipByGlobalNorm @@ -234,9 +234,9 @@ class GradClipByGlobalNorm(GradClipBase): init_value = np.random.uniform( -1, 1, (10, 10)).astype('float32') - fc = FC( "fc", 10) + linear = Linear( 10, 10) - out = fc( to_variable(init_value) ) + out = linear( to_variable(init_value) ) loss = fluid.layers.reduce_mean( out ) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 907ac3209c1..35950f74305 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -959,14 +959,14 @@ class Variable(object): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') with fluid.dygraph.guard(): - fc = FC("fc", 64, num_flatten_dims=2) + linear = Linear(32, 64) data = to_variable(data) - x = fc(data) + x = linear(data) y = x.detach() """ @@ -991,14 +991,14 @@ class Variable(object): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') with fluid.dygraph.guard(): - fc = FC("fc", 64, num_flatten_dims=2) + linear = Linear(32, 64) data = to_variable(data) - x = fc(data) + x = linear(data) print(x.numpy()) """ @@ -1020,17 +1020,17 @@ class Variable(object): import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import FC + from paddle.fluid.dygraph import Linear import numpy as np - data = np.ones([3, 32, 32], dtype='float32') + data = np.ones([3, 1024], dtype='float32') with fluid.dygraph.guard(): - fc = fluid.dygraph.FC("fc", 4) + linear = fluid.dygraph.Linear(1024, 4) t = to_variable(data) - fc(t) # call with default weight + linear(t) # call with default weight custom_weight = np.random.randn(1024, 4).astype("float32") - fc.weight.set_value(custom_weight) # change existing weight - out = fc(t) # call with different weight + linear.weight.set_value(custom_weight) # change existing weight + out = linear(t) # call with different weight """ pass @@ -1223,18 +1223,18 @@ class Variable(object): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(3, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - out2 = fc2(b) + out1 = linear(a) + out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) out.backward() - assert (fc._w.gradient() == 0).all() + assert (linear.weight.gradient() == 0).all() assert (out1.gradient() == 0).all() """ if in_dygraph_mode(): diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 31755e22212..42366aad88e 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -30,14 +30,15 @@ __all__ = ['run_check'] class SimpleLayer(Layer): - def __init__(self, name_scope): - super(SimpleLayer, self).__init__(name_scope) - self._fc1 = nn.FC(self.full_name(), - 3, - param_attr=ParamAttr(initializer=Constant(value=0.1))) + def __init__(self, input_size): + super(SimpleLayer, self).__init__() + self._linear1 = nn.Linear( + input_size, + 3, + param_attr=ParamAttr(initializer=Constant(value=0.1))) def forward(self, inputs): - x = self._fc1(inputs) + x = self._linear1(inputs) x = layers.reduce_sum(x) return x @@ -79,7 +80,7 @@ def run_check(): build_strategy = compiler.BuildStrategy() build_strategy.enable_inplace = True inp = layers.data(name="inp", shape=[2, 2]) - simple_layer = SimpleLayer("simple_layer") + simple_layer = SimpleLayer(input_size=2) out = simple_layer(inp) exe = executor.Executor( core.CUDAPlace(0) if core.is_compiled_with_cuda() and @@ -108,10 +109,11 @@ def run_check(): with unique_name.guard(): inp0 = layers.data( name="inp", shape=[2, 2], append_batch_size=False) - simple_layer0 = SimpleLayer("simple_layer") + simple_layer0 = SimpleLayer(input_size=2) out0 = simple_layer0(inp0) param_grads = backward.append_backward( - out0, parameter_list=[simple_layer0._fc1._w.name])[0] + out0, + parameter_list=[simple_layer0._linear1.weight.name])[0] exe0 = executor.Executor( core.CUDAPlace(0) if core.is_compiled_with_cuda() and (core.get_cuda_device_count() > 0) else core.CPUPlace()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f61a4d04163..4b34c365aa4 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -3002,7 +3002,7 @@ def layer_norm(input, print(output) """ assert in_dygraph_mode( - ) is not True, "please use FC instead of fc in dygraph mode!" + ) is not True, "please use LayerNorm instead of layer_norm in dygraph mode!" helper = LayerHelper('layer_norm', **locals()) dtype = helper.input_dtype() diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 6b163ee56e1..59e9e9e4127 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -17,8 +17,7 @@ from __future__ import print_function import numpy as np import paddle.fluid as fluid -from paddle.fluid import FC -from paddle.fluid.dygraph import FC +from paddle.fluid.dygraph import Linear from paddle.fluid.dygraph.base import to_variable import unittest @@ -33,37 +32,37 @@ class Test_Detach(unittest.TestCase): def no_detach_multi(self): data = self.generate_Data() with fluid.dygraph.guard(): - fc_w_param_attrs = fluid.ParamAttr( + linear_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(5.0)) - fc_b_param_attrs = fluid.ParamAttr( + linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - fc = FC("fc", - 10, - num_flatten_dims=1, - param_attr=fc_w_param_attrs, - bias_attr=fc_b_param_attrs) - fc1_w_param_attrs = fluid.ParamAttr( + linear = Linear( + 4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) + linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) - fc1_b_param_attrs = fluid.ParamAttr( + linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - fc1 = FC("fc", - 1, - num_flatten_dims=1, - param_attr=fc1_w_param_attrs, - bias_attr=fc1_b_param_attrs) - fc2_w_param_attrs = fluid.ParamAttr( + linear1 = Linear( + 10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) + linear2_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(9.0)) - fc2_b_param_attrs = fluid.ParamAttr( + linear2_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(10.0)) - fc2 = FC("fc", - 1, - num_flatten_dims=1, - param_attr=fc2_w_param_attrs, - bias_attr=fc2_b_param_attrs) + linear2 = Linear( + 10, + 1, + param_attr=linear2_w_param_attrs, + bias_attr=linear2_b_param_attrs) data = to_variable(data) - x = fc(data) - x1 = fc1(x) - x2 = fc2(x) + x = linear(data) + x1 = linear1(x) + x2 = linear2(x) loss = x1 + x2 # print(loss, loss.shape) loss.backward() @@ -72,27 +71,27 @@ class Test_Detach(unittest.TestCase): def no_detach_single(self): data = self.generate_Data() with fluid.dygraph.guard(): - fc_w_param_attrs = fluid.ParamAttr( + linear_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(5.0)) - fc_b_param_attrs = fluid.ParamAttr( + linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - fc = FC("fc", - 10, - num_flatten_dims=1, - param_attr=fc_w_param_attrs, - bias_attr=fc_b_param_attrs) - fc1_w_param_attrs = fluid.ParamAttr( + linear = Linear( + 4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) + linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) - fc1_b_param_attrs = fluid.ParamAttr( + linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - fc1 = FC("fc", - 1, - num_flatten_dims=1, - param_attr=fc1_w_param_attrs, - bias_attr=fc1_b_param_attrs) + linear1 = Linear( + 10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) data = to_variable(data) - x = fc(data) - x1 = fc1(x) + x = linear(data) + x1 = linear1(x) loss = x1 # print(loss, loss.shape) loss.backward() @@ -101,38 +100,38 @@ class Test_Detach(unittest.TestCase): def detach_multi(self): data = self.generate_Data() with fluid.dygraph.guard(): - fc_w_param_attrs = fluid.ParamAttr( + linear_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(5.0)) - fc_b_param_attrs = fluid.ParamAttr( + linear_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(6.0)) - fc = FC("fc", - 10, - num_flatten_dims=1, - param_attr=fc_w_param_attrs, - bias_attr=fc_b_param_attrs) - fc1_w_param_attrs = fluid.ParamAttr( + linear = Linear( + 4, + 10, + param_attr=linear_w_param_attrs, + bias_attr=linear_b_param_attrs) + linear1_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(7.0)) - fc1_b_param_attrs = fluid.ParamAttr( + linear1_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(8.0)) - fc1 = FC("fc", - 1, - num_flatten_dims=1, - param_attr=fc1_w_param_attrs, - bias_attr=fc1_b_param_attrs) - fc2_w_param_attrs = fluid.ParamAttr( + linear1 = Linear( + 10, + 1, + param_attr=linear1_w_param_attrs, + bias_attr=linear1_b_param_attrs) + linear2_w_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(9.0)) - fc2_b_param_attrs = fluid.ParamAttr( + linear2_b_param_attrs = fluid.ParamAttr( initializer=fluid.initializer.Constant(10.0)) - fc2 = FC("fc", - 1, - num_flatten_dims=1, - param_attr=fc2_w_param_attrs, - bias_attr=fc2_b_param_attrs) + linear2 = Linear( + 10, + 1, + param_attr=linear2_w_param_attrs, + bias_attr=linear2_b_param_attrs) data = to_variable(data) - x = fc(data) + x = linear(data) x_detach = x.detach() - x1 = fc1(x) - x2 = fc2(x_detach) + x1 = linear1(x) + x2 = linear2(x_detach) loss = x1 + x2 # print(loss, loss.shape) loss.backward() diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py index 34a63e6a953..0a5d8e0cdd3 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_mnist_fp16.py @@ -18,7 +18,7 @@ import unittest import numpy as np import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear class SimpleImgConvPool(fluid.dygraph.Layer): @@ -71,8 +71,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer): - def __init__(self, name_scope, dtype="float32"): - super(MNIST, self).__init__(name_scope) + def __init__(self, dtype="float32"): + super(MNIST, self).__init__() self._simple_img_conv_pool_1 = SimpleImgConvPool( num_channels=3, @@ -94,21 +94,23 @@ class MNIST(fluid.dygraph.Layer): dtype=dtype, use_cudnn=True) - pool_2_shape = 50 * 4 * 4 + self.pool_2_shape = 50 * 53 * 53 SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = FC(self.full_name(), - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax", - dtype=dtype) + scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 + self._linear = Linear( + self.pool_2_shape, + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax", + dtype=dtype) def forward(self, inputs, label): x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_2(x) - cost = self._fc(x) + x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape]) + cost = self._linear(x) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) return avg_loss @@ -123,7 +125,7 @@ class TestMnist(unittest.TestCase): x = np.random.randn(1, 3, 224, 224).astype("float16") y = np.random.randn(1, 1).astype("int64") with fluid.dygraph.guard(fluid.CUDAPlace(0)): - model = MNIST("mnist", dtype="float16") + model = MNIST(dtype="float16") x = fluid.dygraph.to_variable(x) y = fluid.dygraph.to_variable(y) loss = model(x, y) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 6ab4a72e836..3134984f4f6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -18,44 +18,44 @@ import numpy as np class AutoPruneLayer0(fluid.Layer): - def __init__(self, name_scope): - super(AutoPruneLayer0, self).__init__(name_scope) - self.fc1 = fluid.dygraph.FC( - "FC_1", + def __init__(self, input_size): + super(AutoPruneLayer0, self).__init__() + self.linear1 = fluid.dygraph.Linear( + input_size, 5, param_attr=fluid.initializer.ConstantInitializer(value=2), bias_attr=False) - self.fc2 = fluid.dygraph.FC( - "FC_2", + self.linear2 = fluid.dygraph.Linear( + 5, 5, param_attr=fluid.initializer.ConstantInitializer(value=2), bias_attr=False) def forward(self, x, y): - a = self.fc1(x) - b = self.fc2(y) + a = self.linear1(x) + b = self.linear2(y) c = fluid.layers.mul(a, b) d = fluid.layers.reduce_mean(c) return d class AutoPruneLayer1(fluid.Layer): - def __init__(self, name_scope): - super(AutoPruneLayer1, self).__init__(name_scope) - self.fc1 = fluid.dygraph.FC( - "FC_1", + def __init__(self, input_size): + super(AutoPruneLayer1, self).__init__() + self.linear1 = fluid.dygraph.Linear( + input_size, 5, param_attr=fluid.initializer.ConstantInitializer(value=2), bias_attr=False) - self.fc2 = fluid.dygraph.FC( - "FC_2", + self.linear2 = fluid.dygraph.Linear( + 5, 5, param_attr=fluid.initializer.ConstantInitializer(value=2), bias_attr=False) def forward(self, x, y): - a = self.fc1(x) - b = self.fc2(y) + a = self.linear1(x) + b = self.linear2(y) b.stop_gradient = True c = fluid.layers.mul(a, b) d = fluid.layers.reduce_mean(c) @@ -63,14 +63,14 @@ class AutoPruneLayer1(fluid.Layer): class AutoPruneLayer2(fluid.Layer): - def __init__(self, name_scope): - super(AutoPruneLayer2, self).__init__(name_scope) - self.fc = fluid.dygraph.FC("FC1", size=10, act=None) - self.fc2 = fluid.dygraph.FC("FC2", size=1, act=None) + def __init__(self, input_size): + super(AutoPruneLayer2, self).__init__() + self.linear = fluid.dygraph.Linear(input_size, 10, act=None) + self.linear2 = fluid.dygraph.Linear(1, 1, act=None) def forward(self, x, label): - feature = self.fc(x) - label = self.fc2(label) + feature = self.linear(x) + label = self.linear2(label) label = fluid.layers.cast(label, dtype="float32") label = fluid.layers.cast(label, dtype='int64') # Note that the label is not persistable in fluid.layers.cross_entropy. @@ -80,12 +80,12 @@ class AutoPruneLayer2(fluid.Layer): class AutoPruneLayer3(fluid.Layer): - def __init__(self, name_scope): - super(AutoPruneLayer3, self).__init__(name_scope) - self.fc = fluid.dygraph.FC("FC1", size=20, act=None) + def __init__(self, input_size): + super(AutoPruneLayer3, self).__init__() + self.linear = fluid.dygraph.Linear(input_size, 20, act=None) def forward(self, x, label, test_num): - feature = self.fc(x) + feature = self.linear(x) part1, part2 = fluid.layers.split( feature, num_or_sections=[10, 10], dim=1) # Note that: part2 is not used. @@ -98,67 +98,68 @@ class AutoPruneLayer3(fluid.Layer): class MyLayer(fluid.Layer): - def __init__(self, name_scope, vocab_size, size, dtype="float32"): - super(MyLayer, self).__init__(name_scope, dtype) + def __init__(self, input_size, vocab_size, size, dtype="float32"): + super(MyLayer, self).__init__(dtype=dtype) self.embed0 = fluid.Embedding(size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size)) - self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) - self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) + self.linear_0 = fluid.Linear(input_size, size, dtype=dtype) + self.linear_1 = fluid.Linear(input_size, size, dtype=dtype) def forward(self, x): - # this method involves only the fc layers - loss = fluid.layers.reduce_mean(self.fc0(x) + self.fc1(x)) + # this method involves only the linear layers + loss = fluid.layers.reduce_mean(self.linear_0(x) + self.linear_1(x)) return loss def linear0(self, x): - loss = fluid.layers.reduce_mean(self.fc0(x)) + loss = fluid.layers.reduce_mean(self.linear_0(x)) return loss def embed_linear0(self, x): - loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x))) + loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x))) return loss class MyLayer2(fluid.Layer): - def __init__(self, name_scope, vocab_size, size, dtype="float32"): - super(MyLayer2, self).__init__(name_scope, dtype) + def __init__(self, input_size, vocab_size, size, dtype="float32"): + super(MyLayer2, self).__init__(dtype=dtype) self.embed0 = fluid.Embedding(size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size)) - self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) - self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) + self.linear_0 = fluid.Linear(input_size, size, dtype=dtype) + self.linear_1 = fluid.Linear(input_size, size, dtype=dtype) def forward(self, indices): # mind the difference with MyLayer # In this example, the forward method involes all params loss = fluid.layers.reduce_mean( - self.fc0(self.embed0(indices)) + self.fc1(self.embed1(indices))) + self.linear_0(self.embed0(indices)) + self.linear_1( + self.embed1(indices))) return loss def linear0(self, x): - loss = fluid.layers.reduce_mean(self.fc0(x)) + loss = fluid.layers.reduce_mean(self.linear_0(x)) return loss def embed_linear0(self, x): - loss = fluid.layers.reduce_mean(self.fc0(self.embed0(x))) + loss = fluid.layers.reduce_mean(self.linear_0(self.embed0(x))) return loss class TestImperativeAutoPrune(unittest.TestCase): def test_auto_prune(self): with fluid.dygraph.guard(): - case1 = AutoPruneLayer0("l1") + case1 = AutoPruneLayer0(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") value2 = np.arange(25).reshape(5, 5).astype("float32") v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss = case1(v1, v2) loss.backward() - self.assertTrue(case1.fc2.weight._grad_ivar() is not None) - self.assertTrue(case1.fc1.weight._grad_ivar() is not None) + self.assertTrue(case1.linear2.weight._grad_ivar() is not None) + self.assertTrue(case1.linear1.weight._grad_ivar() is not None) def test_auto_prune2(self): with fluid.dygraph.guard(): - case2 = AutoPruneLayer1("l1") + case2 = AutoPruneLayer1(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") value2 = np.arange(25).reshape(5, 5).astype("float32") v1 = fluid.dygraph.to_variable(value1) @@ -166,43 +167,43 @@ class TestImperativeAutoPrune(unittest.TestCase): loss = case2(v1, v2) loss.backward() - self.assertTrue(case2.fc2.weight._grad_ivar() is None) - self.assertTrue(case2.fc1.weight._grad_ivar() is not None) + self.assertTrue(case2.linear2.weight._grad_ivar() is None) + self.assertTrue(case2.linear1.weight._grad_ivar() is not None) def test_auto_prune3(self): with fluid.dygraph.guard(): - case3 = AutoPruneLayer3("l3") + case3 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") value2 = np.arange(1).reshape(1, 1).astype("int64") v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part2 = case3(v1, v2, 1) loss.backward() - self.assertTrue(case3.fc.weight._grad_ivar() is not None) + self.assertTrue(case3.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) def test_auto_prune4(self): with fluid.dygraph.guard(): - case4 = AutoPruneLayer3("l3") + case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") value2 = np.arange(1).reshape(1, 1).astype("int64") v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part2 = case4(v1, v2, 1) part2.backward() - self.assertTrue(case4.fc.weight._grad_ivar() is not None) + self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 1).all()) def test_auto_prune5(self): with fluid.dygraph.guard(): - case4 = AutoPruneLayer3("l3") + case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") value2 = np.arange(1).reshape(1, 1).astype("int64") v1 = fluid.dygraph.to_variable(value1) v2 = fluid.dygraph.to_variable(value2) loss, part1, part2 = case4(v1, v2, 2) part1.backward() - self.assertTrue(case4.fc.weight._grad_ivar() is not None) + self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) def test_auto_prune6(self): @@ -210,17 +211,17 @@ class TestImperativeAutoPrune(unittest.TestCase): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(3, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - out2 = fc2(b) + out1 = linear(a) + out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) out.backward() - self.assertTrue((fc.weight.gradient() == 0).all()) + self.assertTrue((linear.weight.gradient() == 0).all()) self.assertTrue((out1.gradient() == 0).all()) def test_auto_prune7(self): @@ -228,18 +229,18 @@ class TestImperativeAutoPrune(unittest.TestCase): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(3, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - out2 = fc2(b) + out1 = linear(a) + out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) backward_strategy = fluid.dygraph.BackwardStrategy() out.backward(backward_strategy) - self.assertTrue((fc.weight.gradient() == 0).all()) + self.assertTrue((linear.weight.gradient() == 0).all()) self.assertTrue((out1.gradient() == 0).all()) def test_auto_prune8(self): @@ -247,48 +248,52 @@ class TestImperativeAutoPrune(unittest.TestCase): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(5, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - fc_origin = fc.weight.numpy() - out2 = fc2(out1) - fc2_origin = fc2.weight.numpy() - fc2.weight.stop_gradient = True + out1 = linear(a) + linear_origin = linear.weight.numpy() + out2 = linear2(out1) + linear2_origin = linear2.weight.numpy() + linear2.weight.stop_gradient = True out2.backward() optimizer = fluid.optimizer.SGD( learning_rate=0.003, - parameter_list=(fc.parameters() + fc2.parameters())) + parameter_list=(linear.parameters() + linear2.parameters())) optimizer.minimize(out2) - self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy())) - self.assertFalse(np.array_equal(fc_origin, fc.weight.numpy())) + self.assertTrue( + np.array_equal(linear2_origin, linear2.weight.numpy())) + self.assertFalse( + np.array_equal(linear_origin, linear.weight.numpy())) def test_auto_prune9(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(5, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - fc_origin = fc.weight.numpy() - out2 = fc2(out1) - fc2_origin = fc2.weight.numpy() + out1 = linear(a) + linear_origin = linear.weight.numpy() + out2 = linear2(out1) + linear2_origin = linear2.weight.numpy() out2.stop_gradient = True out2.backward() optimizer = fluid.optimizer.SGD( learning_rate=0.003, - parameter_list=(fc.parameters() + fc2.parameters())) + parameter_list=(linear.parameters() + linear2.parameters())) optimizer.minimize(out2) - self.assertTrue(np.array_equal(fc2_origin, fc2.weight.numpy())) - self.assertTrue(np.array_equal(fc_origin, fc.weight.numpy())) + self.assertTrue( + np.array_equal(linear2_origin, linear2.weight.numpy())) + self.assertTrue( + np.array_equal(linear_origin, linear.weight.numpy())) try: - fc2.weight.gradient() + linear2.weight.gradient() except ValueError as e: assert type(e) == ValueError @@ -297,19 +302,19 @@ class TestImperativeAutoPrune(unittest.TestCase): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") value2 = np.arange(10).reshape(2, 5).astype("float32") - fc = fluid.FC("fc1", size=5, dtype="float32") - fc2 = fluid.FC("fc2", size=3, dtype="float32") + linear = fluid.Linear(13, 5, dtype="float32") + linear2 = fluid.Linear(3, 3, dtype="float32") a = fluid.dygraph.to_variable(value0) b = fluid.dygraph.to_variable(value1) c = fluid.dygraph.to_variable(value2) - out1 = fc(a) - out2 = fc2(b) + out1 = linear(a) + out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True out.backward(backward_strategy) - self.assertTrue((fc.weight.gradient() == 0).all()) + self.assertTrue((linear.weight.gradient() == 0).all()) self.assertTrue((out1.gradient() == 0).all()) def test_auto_prune_with_optimizer(self): @@ -323,13 +328,13 @@ class TestImperativeAutoPrune(unittest.TestCase): place = fluid.CPUPlace() with fluid.dygraph.guard(place): - model = MyLayer("mylayer", vocab_size, size) + model = MyLayer(size, vocab_size, size) optimizer = fluid.optimizer.AdamOptimizer( 0.001, parameter_list=model.parameters()) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001) indices = fluid.dygraph.to_variable(indices) - emebd = fluid.dygraph.to_variable(embed) + embed = fluid.dygraph.to_variable(embed) dummy_loss = model(embed) loss = model.embed_linear0(indices) @@ -337,12 +342,12 @@ class TestImperativeAutoPrune(unittest.TestCase): _, params_grads = optimizer.minimize(loss, grad_clip=grad_clip) for items in params_grads: assert items[0].name is not model.embed1.weight.name - assert items[0].name is not model.fc1.weight.name + assert items[0].name is not model.linear_1.weight.name assert model.embed1.weight._grad_ivar() is None - assert model.fc1.weight._grad_ivar() is None + assert model.linear_1.weight._grad_ivar() is None with fluid.dygraph.guard(place): - model = MyLayer2("mylayer", vocab_size, size) + model = MyLayer2(size, vocab_size, size) optimizer = fluid.optimizer.AdamOptimizer( 0.001, parameter_list=model.parameters()) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001) @@ -356,9 +361,9 @@ class TestImperativeAutoPrune(unittest.TestCase): optimizer.minimize(loss, grad_clip=grad_clip) for items in params_grads: assert items[0].name is not model.embed1.weight.name - assert items[0].name is not model.fc1.weight.name + assert items[0].name is not model.linear_1.weight.name assert model.embed1.weight._grad_ivar() is None - assert model.fc1.weight._grad_ivar() is None + assert model.linear_1.weight._grad_ivar() is None def test_case2_prune_no_grad_branch(self): with fluid.dygraph.guard(): @@ -366,11 +371,11 @@ class TestImperativeAutoPrune(unittest.TestCase): value2 = np.arange(1).reshape(1, 1) v1 = fluid.dygraph.to_variable(value1).astype("float32") v2 = fluid.dygraph.to_variable(value2).astype("float32") - case3 = AutoPruneLayer2("l2") + case3 = AutoPruneLayer2(input_size=784) loss = case3(v1, v2) loss.backward() - self.assertTrue(case3.fc2.weight._grad_ivar() is None) - self.assertTrue(case3.fc.weight._grad_ivar() is not None) + self.assertTrue(case3.linear2.weight._grad_ivar() is None) + self.assertTrue(case3.linear.weight._grad_ivar() is not None) def test_case2_prune_no_grad_branch(self): with fluid.dygraph.guard(): @@ -378,24 +383,24 @@ class TestImperativeAutoPrune(unittest.TestCase): value2 = np.arange(1).reshape(1, 1) v1 = fluid.dygraph.to_variable(value1).astype("float32") v2 = fluid.dygraph.to_variable(value2).astype("float32") - case3 = AutoPruneLayer2("l2") + case3 = AutoPruneLayer2(input_size=784) loss = case3(v1, v2) loss.backward() - self.assertTrue(case3.fc2.weight._grad_ivar() is None) - self.assertTrue(case3.fc.weight._grad_ivar() is not None) + self.assertTrue(case3.linear2.weight._grad_ivar() is None) + self.assertTrue(case3.linear.weight._grad_ivar() is not None) def test_case3_prune_no_grad_branch2(self): with fluid.dygraph.guard(): value1 = np.arange(1).reshape(1, 1) - fc = fluid.dygraph.FC("FC1", size=1, act=None) + linear = fluid.dygraph.Linear(1, 1, act=None) label = fluid.dygraph.to_variable(value1).astype("float32") - label = fc(label) + label = linear(label) label = fluid.layers.cast(label, dtype="float32") label = fluid.layers.cast(label, dtype='int64') out = fluid.layers.one_hot(input=label, depth=100) loss = fluid.layers.mean(out) loss.backward() - self.assertTrue(fc.weight._grad_ivar() is None) + self.assertTrue(linear.weight._grad_ivar() is None) def test_case4_with_no_grad_op_maker(self): with fluid.dygraph.guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 8f1e2fdd2a3..14e5e20b92a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -18,7 +18,7 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid import FC +from paddle.fluid import Linear from test_imperative_base import new_program_scope @@ -35,24 +35,26 @@ class MyLayer(fluid.Layer): class MLP(fluid.Layer): - def __init__(self, name_scope): - super(MLP, self).__init__(name_scope) - self._fc1 = FC(self.full_name(), - 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) - self._fc2 = FC(self.full_name(), - 4, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1))) + def __init__(self, input_size): + super(MLP, self).__init__() + self._linear1 = Linear( + input_size, + 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) + self._linear2 = Linear( + 3, + 4, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): - x = self._fc1(inputs) - x = self._fc2(x) + x = self._linear1(inputs) + x = self._linear2(x) x = fluid.layers.reduce_sum(x) return x @@ -338,29 +340,29 @@ class TestImperative(unittest.TestCase): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with fluid.dygraph.guard(): var_inp = fluid.dygraph.base.to_variable(np_inp) - mlp = MLP("mlp") + mlp = MLP(input_size=2) out = mlp(var_inp) dy_out = out.numpy() out.backward() - dy_grad = mlp._fc1.weight.gradient() + dy_grad = mlp._linear1.weight.gradient() with fluid.dygraph.guard(): var_inp2 = fluid.dygraph.base.to_variable(np_inp) - mlp2 = MLP("mlp") + mlp2 = MLP(input_size=2) out2 = mlp2(var_inp2) dy_out2 = out2.numpy() backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True out2.backward(backward_strategy) - dy_grad2 = mlp2._fc1.weight.gradient() + dy_grad2 = mlp2._linear1.weight.gradient() with new_program_scope(): inp = fluid.layers.data( name="inp", shape=[2, 2], append_batch_size=False) - mlp = MLP("mlp") + mlp = MLP(input_size=2) out = mlp(inp) param_grads = fluid.backward.append_backward( - out, parameter_list=[mlp._fc1.weight.name])[0] + out, parameter_list=[mlp._linear1.weight.name])[0] exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) @@ -375,15 +377,15 @@ class TestImperative(unittest.TestCase): self.assertTrue(np.allclose(dy_grad2, static_grad)) params = mlp.parameters(True) - self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name) - self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name) - self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name) - self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name) + self.assertEqual("linear_0.w_0", params[0].name) + self.assertEqual("linear_0.b_0", params[1].name) + self.assertEqual("linear_1.w_0", params[2].name) + self.assertEqual("linear_1.b_0", params[3].name) self.assertEqual(len(params), 4) sublayers = mlp.sublayers(True) - self.assertEqual(mlp._fc1, sublayers[0]) - self.assertEqual(mlp._fc2, sublayers[1]) + self.assertEqual(mlp._linear1, sublayers[0]) + self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(len(sublayers), 2) def test_dygraph_vs_static(self): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py index dbd5296e5f1..171687283bc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_debug_string.py @@ -20,17 +20,17 @@ import numpy as np class MLP(fluid.Layer): - def __init__(self, name_scope): - super(MLP, self).__init__(name_scope) - self._fc1 = fluid.dygraph.FC( - self.full_name(), + def __init__(self, input_size): + super(MLP, self).__init__() + self._linear1 = fluid.dygraph.Linear( + input_size, 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1))) - self._fc2 = fluid.dygraph.FC( - self.full_name(), + self._linear2 = fluid.dygraph.Linear( + 3, 4, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)), @@ -38,8 +38,8 @@ class MLP(fluid.Layer): initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): - x = self._fc1(inputs) - x = self._fc2(x) + x = self._linear1(inputs) + x = self._linear2(x) x = fluid.layers.reduce_sum(x) return x @@ -51,7 +51,7 @@ class TestDygraphDebugString(unittest.TestCase): trace_var = 0 alive_var = 0 with fluid.dygraph.guard(): - mlp = MLP("mlp") + mlp = MLP(input_size=2) for i in range(10): var_inp = fluid.dygraph.base.to_variable(np_inp) out = mlp(var_inp) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py index d68d362f0be..78ad00fb9a7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py @@ -21,17 +21,17 @@ from test_imperative_base import new_program_scope class MLP(fluid.Layer): - def __init__(self, name_scope): - super(MLP, self).__init__(name_scope) - self._fc1 = fluid.dygraph.FC( - self.full_name(), + def __init__(self, input_size): + super(MLP, self).__init__() + self._linear1 = fluid.dygraph.Linear( + input_size, 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1))) - self._fc2 = fluid.dygraph.FC( - self.full_name(), + self._linear2 = fluid.dygraph.Linear( + 3, 4, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.1)), @@ -39,8 +39,8 @@ class MLP(fluid.Layer): initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): - x = self._fc1(inputs) - x = self._fc2(x) + x = self._linear1(inputs) + x = self._linear2(x) x = fluid.layers.reduce_sum(x) return x @@ -48,7 +48,7 @@ class MLP(fluid.Layer): class TestDygraphFramework(unittest.TestCase): def test_dygraph_backward(self): with new_program_scope(): - mlp = MLP("mlp") + mlp = MLP(input_size=2) var_inp = fluid.layers.data( "input", shape=[2, 2], dtype="float32", append_batch_size=False) out = mlp(var_inp) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py index ed721503a14..5e3d3c81188 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_partitial_backward.py @@ -24,30 +24,30 @@ class TestImperativePartitialBackward(unittest.TestCase): with fluid.dygraph.guard(): x = np.random.randn(2, 4, 5).astype("float32") x = fluid.dygraph.to_variable(x) - fc1 = fluid.dygraph.FC("fc1", 10, num_flatten_dims=2) - fc2 = fluid.dygraph.FC("fc2", 10, num_flatten_dims=2) + linear1 = fluid.dygraph.Linear(5, 10) + linear2 = fluid.dygraph.Linear(5, 10) - y = fc1(x[:, :2]) - z = fc2(x[:, 2:]) + y = linear1(x[:, :2]) + z = linear2(x[:, 2:]) loss = fluid.layers.reduce_mean(y) loss.backward() - for param in fc1.parameters(): + for param in linear1.parameters(): self.assertIsNotNone(param._grad_ivar()) - for param in fc2.parameters(): + for param in linear2.parameters(): self.assertIsNone(param._grad_ivar()) optimizer = fluid.optimizer.AdamOptimizer(parameter_list=( - fc1.parameters() + fc2.parameters())) + linear1.parameters() + linear2.parameters())) _, params_grads = optimizer.minimize(loss) self.assertListEqual( - sorted([p.name for p in fc1.parameters()]), + sorted([p.name for p in linear1.parameters()]), sorted([p_g[0].name for p_g in params_grads])) - fc1.clear_gradients() - fc2.clear_gradients() + linear1.clear_gradients() + linear2.clear_gradients() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py index 983fe23f448..735ec4d3f1e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py @@ -23,18 +23,18 @@ import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.optimizer import SGDOptimizer -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear import paddle.fluid.dygraph.nn as nn from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope class Policy(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Policy, self).__init__(name_scope) + def __init__(self, input_size): + super(Policy, self).__init__() - self.affine1 = nn.FC(self.full_name(), size=128) - self.affine2 = nn.FC(self.full_name(), size=2) + self.affine1 = nn.Linear(input_size, 128) + self.affine2 = nn.Linear(128, 2) self.dropout_ratio = 0.6 self.saved_log_probs = [] @@ -67,7 +67,7 @@ class TestImperativeMnist(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - policy = Policy("PolicyModel") + policy = Policy(input_size=4) dy_state = fluid.dygraph.base.to_variable(state) dy_state.stop_gradient = True @@ -111,7 +111,7 @@ class TestImperativeMnist(unittest.TestCase): exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - policy = Policy("PolicyModel") + policy = Policy(input_size=4) st_sgd = SGDOptimizer(learning_rate=1e-3) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 3d2868a9765..01327ac647f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -131,14 +131,13 @@ class SimpleLSTMRNN(fluid.Layer): class PtbModel(fluid.Layer): def __init__(self, - name_scope, hidden_size, vocab_size, num_layers=2, num_steps=20, init_scale=0.1, dropout=None): - super(PtbModel, self).__init__(name_scope) + super(PtbModel, self).__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size self.init_scale = init_scale @@ -160,7 +159,18 @@ class PtbModel(fluid.Layer): initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) - self.out_project = Linear(self.hidden_size, self.vocab_size) + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) def forward(self, input, label, init_hidden, init_cell): init_h = fluid.layers.reshape( @@ -182,7 +192,8 @@ class PtbModel(fluid.Layer): rnn_out = fluid.layers.reshape( rnn_out, shape=[-1, self.num_steps, self.hidden_size]) - projection = self.out_project(rnn_out) + projection = fluid.layers.matmul(rnn_out, self.softmax_weight) + projection = fluid.layers.elementwise_add(projection, self.softmax_bias) projection = fluid.layers.reshape( projection, shape=[-1, self.vocab_size]) loss = fluid.layers.softmax_with_cross_entropy( @@ -210,7 +221,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -294,7 +304,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -400,7 +409,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -505,7 +513,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -614,7 +621,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -694,7 +700,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, @@ -786,7 +791,6 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel( - "ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 44e6d8a8c35..fa345be1ff0 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -85,30 +85,25 @@ class LayerTest(unittest.TestCase): class TestLayer(LayerTest): def test_custom_layer_with_kwargs(self): class CustomLayer(fluid.Layer): - def __init__(self, name_scope, fc1_size=4): - super(CustomLayer, self).__init__(name_scope) - self.fc1 = nn.FC('fc1', - size=fc1_size, - bias_attr=False, - num_flatten_dims=1) - self.fc2 = nn.FC('fc2', - size=1, - bias_attr=False, - num_flatten_dims=1) - - def forward(self, x, do_fc2=False): - ret = self.fc1(x) - if do_fc2: - ret = self.fc2(ret) + def __init__(self, input_size, linear1_size=4): + super(CustomLayer, self).__init__() + self.linear1 = nn.Linear( + input_size, linear1_size, bias_attr=False) + self.linear2 = nn.Linear(linear1_size, 1, bias_attr=False) + + def forward(self, x, do_linear2=False): + ret = self.linear1(x) + if do_linear2: + ret = self.linear2(ret) return ret with self.dynamic_graph(): inp = np.ones([3, 3], dtype='float32') x = base.to_variable(inp) - custom = CustomLayer('custom', fc1_size=2) - ret = custom(x, do_fc2=False) + custom = CustomLayer(input_size=3, linear1_size=2) + ret = custom(x, do_linear2=False) self.assertTrue(np.array_equal(ret.numpy().shape, [3, 2])) - ret = custom(x, do_fc2=True) + ret = custom(x, do_linear2=True) self.assertTrue(np.array_equal(ret.numpy().shape, [3, 1])) def test_linear(self): @@ -133,112 +128,6 @@ class TestLayer(LayerTest): self.assertTrue(np.array_equal(static_ret, dy_ret_value)) - inp = np.ones([3, 32], dtype='float32') - with self.dynamic_graph(): - t = base.to_variable(inp) - linear = nn.Linear(32, 4, bias_attr=False) - dy_ret = linear(t) - dy_ret_value = dy_ret.numpy() - with self.dynamic_graph(): - t = base.to_variable(inp) - fc = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) - dy_ret2 = fc(t) - dy_ret_value2 = dy_ret2.numpy() - self.assertTrue(np.array_equal(dy_ret_value, dy_ret_value2)) - - def test_fc(self): - inp = np.ones([3, 32, 32], dtype='float32') - with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) - ret = layers.fc(t, size=4, bias_attr=False, num_flatten_dims=1) - ret2 = layers.fc(ret, size=4) - static_ret = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret2])[0] - with self.static_graph(): - t = layers.data( - name='data', - shape=[3, 32, 32], - dtype='float32', - append_batch_size=False) - fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) - fc2 = nn.FC('fc2', size=4) - ret = fc1(t) - ret2 = fc2(ret) - static_ret2 = self.get_static_graph_result( - feed={'data': inp}, fetch_list=[ret2])[0] - with self.dynamic_graph(): - t = base.to_variable(inp) - fc1 = nn.FC('fc1', size=4, bias_attr=False, num_flatten_dims=1) - fc2 = nn.FC('fc2', size=4) - ret = fc1(t) - dy_ret = fc2(ret) - dy_ret_value = dy_ret.numpy() - - self.assertTrue(np.array_equal(static_ret, static_ret2)) - self.assertTrue(np.array_equal(static_ret, dy_ret_value)) - - with self.dynamic_graph(): - custom_weight = np.random.randn(1024, 4).astype("float32") - weight_attr1 = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight)) - fc1 = fluid.dygraph.FC("fc1", - 4, - num_flatten_dims=1, - param_attr=weight_attr1) - out1 = fc1(base.to_variable(inp)) - loss1 = fluid.layers.reduce_mean(out1) - - fc1_weight_init = fc1.weight.detach() - fc1_bias_init = fc1.bias.detach() - - loss1.backward() - optimizer1 = fluid.optimizer.SGD(learning_rate=0.1, - parameter_list=fc1.parameters()) - optimizer1.minimize(loss1) - - fc1_weight_updated = fc1.weight.detach() - - with self.dynamic_graph(): - weight_attr2 = fluid.ParamAttr( - initializer=fluid.initializer.Uniform()) - fc2 = fluid.dygraph.FC("fc2", - 4, - num_flatten_dims=1, - param_attr=weight_attr2) - out2 = fc2(base.to_variable(inp)) - - self.assertFalse( - np.array_equal(fc1_weight_init.numpy(), fc2.weight.numpy())) - self.assertFalse(np.array_equal(out1.numpy(), out2.numpy())) - - mismatched_weight = np.random.randn(4, 4).astype("float32") - with self.assertRaises(AssertionError): - fc2.weight.set_value(mismatched_weight) - fc2.weight.set_value(fc1_weight_init) - fc2.bias.set_value(fc1_bias_init) - - out2 = fc2(base.to_variable(inp)) - loss2 = fluid.layers.reduce_mean(out2) - loss2.backward() - optimizer2 = fluid.optimizer.SGD(learning_rate=0.1, - parameter_list=fc2.parameters()) - optimizer2.minimize(loss2) - - self.assertTrue( - np.array_equal(fc2.weight.numpy(), fc1_weight_updated.numpy())) - self.assertTrue(np.array_equal(out1.numpy(), out2.numpy())) - - fc2.weight = fc1.weight - fc2.bias = fc1.bias - self.assertTrue( - np.array_equal(fc2.weight.numpy(), fc1.weight.numpy())) - self.assertTrue(np.array_equal(fc2.bias.numpy(), fc1.bias.numpy())) - def test_layer_norm(self): inp = np.ones([3, 32, 32], dtype='float32') with self.static_graph(): -- GitLab