diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 9856276b20b7affb548847d359463451bb238518..67179880b5609b806a1ce231dca18573aaf5966e 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -15,19 +15,20 @@ from __future__ import print_function from six.moves import reduce -import numpy as np from .. import core from ..layers import utils from . import layers -from ..framework import Variable, OpProtoHolder -from ..layers import layer_function_generator +from ..framework import Variable from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer +import numpy as np __all__ = [ - 'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm', - 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', 'SequenceConv' + 'Conv2D', 'Conv3D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', + 'LayerNorm', 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', + 'Conv3DTranspose', 'SequenceConv', 'RowConv', 'GroupNorm', 'SpectralNorm', + 'TreeConv' ] @@ -137,6 +138,200 @@ class Conv2D(layers.Layer): return self._helper.append_activation(pre_act, act=self._act) +class Conv3D(layers.Layer): + def __init__(self, + name_scope, + num_channels, + num_filters, + filter_size, + stride=1, + padding=0, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + dtype=core.VarDesc.VarType.FP32): + assert param_attr is not False, "param_attr should not be False here." + super(Conv3D, self).__init__(name_scope) + self._groups = groups + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._padding = utils.convert_to_list(padding, 3, 'padding') + self._dilation = utils.convert_to_list(dilation, 4, 'dilation') + self._act = act + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + self._use_cudnn = use_cudnn + self._l_type = 'conv3d' + self._dtype = dtype + + if groups is None: + num_filter_channels = num_channels + else: + if num_channels % groups != 0: + raise ValueError("num_channels must be divisible by groups.") + num_filter_channels = num_channels // groups + + filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') + + filter_shape = [num_filters, num_filter_channels] + filter_size + + def _get_default_param_initializer(): + filter_elem_num = filter_size[0] * filter_size[1] * filter_size[ + 2] * num_channels + std = (2.0 / filter_elem_num)**0.5 + return Normal(0.0, std, 0) + + self._filter_param = self.create_parameter( + attr=param_attr, + shape=filter_shape, + dtype=self._dtype, + default_initializer=_get_default_param_initializer()) + + self._bias_param = self.create_parameter( + attr=bias_attr, + shape=[num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type=self._l_type, + inputs={ + 'Input': input, + 'Filter': self._filter_param, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn, + 'use_mkldnn': False + }) + + pre_act = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) + + return self._helper.append_activation(pre_act, act=self._act) + + +class Conv3DTranspose(layers.Layer): + def __init__(self, + name_scope, + num_filters, + output_size=None, + filter_size=None, + padding=0, + stride=1, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + name=None): + super(Conv3DTranspose, self).__init__(name_scope) + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + assert param_attr is not False, "param_attr should not be False in conv3d_transpose." + self._padding = utils.convert_to_list(padding, 3, 'padding') + self._stride = utils.convert_to_list(stride, 3, 'stride') + self._dilation = utils.convert_to_list(dilation, 3, 'dilation') + self._param_attr = param_attr + self._filter_size = filter_size + self._output_size = output_size + self._groups = 1 if groups is None else groups + self._num_filters = num_filters + self._use_cudnn = use_cudnn + self._bias_attr = bias_attr + self._act = act + + def _build_once(self, input): + self._dtype = self._helper.input_dtype(input) + self._input_channel = input.shape[1] + + if self._filter_size is None: + if self._output_size is None: + raise ValueError( + "output_size must be set when filter_size is None") + if isinstance(self._output_size, int): + self._output_size = [self._output_size, self._output_size] + + d_in = input.shape[2] + h_in = input.shape[3] + w_in = input.shape[4] + + filter_size_d = (self._output_size[0] - + (d_in - 1) * self._stride[0] + 2 * self._padding[0] + - 1) // self._dilation[0] + 1 + filter_size_h = (self._output_size[1] - + (h_in - 1) * self._stride[1] + 2 * self._padding[1] + - 1) // self._dilation[1] + 1 + filter_size_w = (self._output_size[2] - + (w_in - 1) * self._stride[2] + 2 * self._padding[2] + - 1) // self._dilation[2] + 1 + self._filter_size = [filter_size_d, filter_size_h, filter_size_w] + else: + self._filter_size = utils.convert_to_list( + self._filter_size, 3, 'conv3d_transpose.filter_size') + + filter_shape = [ + self._input_channel, self._num_filters // self._groups + ] + self._filter_size + self._img_filter = self.create_parameter( + dtype=self._dtype, shape=filter_shape, attr=self._param_attr) + if self._bias_attr: + self._bias_param = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type="conv3d_transpose", + inputs={'Input': [input], + 'Filter': [self._img_filter]}, + outputs={'Output': pre_bias}, + attrs={ + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': self._groups if self._groups else 1, + 'use_cudnn': self._use_cudnn + }) + + if self._bias_attr: + pre_act = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_act]}, + attrs={'axis': 1}) + else: + pre_act = pre_bias + + # Currently, we don't support inplace in imperative mode + return self._helper.append_activation(pre_act, act=self._act) + + class Pool2D(layers.Layer): def __init__(self, name_scope, @@ -1397,3 +1592,234 @@ class SequenceConv(layers.Layer): }) pre_act = self._helper.append_bias_op(pre_bias) return self._helper.append_activation(pre_act) + + +class RowConv(layers.Layer): + def __init__(self, + name_scope, + future_context_size, + param_attr=None, + act=None): + super(RowConv, self).__init__(name_scope) + self._act = act + self._param_attr = param_attr + self._future_context_size = future_context_size + + def _buils_once(self, input): + self._dtype = self._helper.input_dtype(input) + filter_shape = [self._future_context_size + 1, input.shape[1]] + self._f = self.create_parameter( + attr=self._param_attr, shape=filter_shape, dtype=self._dtype) + + def forward(self, input): + out = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type='row_conv', + inputs={'X': [input], + 'Filter': [self._f]}, + outputs={'Out': [out]}) + return self._helper.append_activation(out, act=self._act) + + +class GroupNorm(layers.Layer): + """ + **Group Normalization Layer** + + Refer to `Group Normalization `_ . + + Args: + name_scope (str): See base class. + groups(int): The number of groups that divided from channels. + epsilon(float): The small value added to the variance to prevent + division by zero. + param_attr(ParamAttr|None): The parameter attribute for the learnable + scale :math:`g`. If it is set to False, no scale will be added to the output units. + If it is set to None, the bias is initialized one. Default: None. + bias_attr(ParamAttr|None): The parameter attribute for the learnable + bias :math:`b`. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + act(str): Activation to be applied to the output of group normalizaiton. + data_layout(string|NCHW): Only NCHW is supported. + dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc + + Returns: + Variable: A tensor variable which is the result after applying group normalization on the input. + + + """ + + def __init__(self, + name_scope, + groups, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + act=None, + data_layout='NCHW'): + super(GroupNorm, self).__init__(name_scope) + self._param_attr = param_attr + self._bias_attr = bias_attr + self._epsilon = epsilon + self._groups = groups + self._act = act + if data_layout != 'NCHW': + raise ValueError("unsupported data layout:" + data_layout) + + def _buils_once(self, input): + self._dtype = self._helper.input_dtype(input) + param_shape = [input.shape[1]] + if self._bias_attr: + self._bias = self.create_parameter( + attr=self._bias_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=True) + + if self._param_attr: + self._scale = self.create_parameter( + attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0)) + + def forward(self, input): + inputs = {'X': input} + if self._bias: + inputs['Bias'] = self._bias + if self._scale: + inputs['Scale'] = self._scale + + # create output + mean_out = self._helper.create_variable( + dtype=self._dtype, stop_gradient=True) + self.create_variable( + name="mean_out", persistable=True, type=self._dtype) + variance_out = self._helper.create_variable_for_type_inference( + dtype=self._dtype, stop_gradient=True) + group_norm_out = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type="group_norm", + inputs=inputs, + outputs={ + "Y": group_norm_out, + "Mean": mean_out, + "Variance": variance_out, + }, + attrs={"epsilon": self._epsilon, + "groups": self._groups}) + + return self._helper.append_activation(group_norm_out, self._act) + + +class SpectralNorm(layers.Layer): + def __init__(self, name_scope, dim=0, power_iters=1, eps=1e-12, name=None): + super(SpectralNorm, self).__init__(name_scope) + self._power_iters = power_iters + self._eps = eps + self._dim = dim + + def _build_once(self, weight): + self._dtype = self._helper.input_dtype(weight) + input_shape = weight.shape + h = input_shape[self._dim] + w = np.prod(input_shape) // h + + self.u = self.create_parameter( + attr=ParamAttr(), + shape=[h], + dtype=self._dtype, + default_initializer=Normal(0., 1.)) + self.u.stop_gradient = True + + self.v = self.create_parameter( + attr=ParamAttr(), + shape=[w], + dtype=self._dtype, + default_initializer=Normal(0., 1.)) + self.v.stop_gradient = True + + def forward(self, weight): + inputs = {'Weight': weight, 'U': self.u, 'V': self.v} + out = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type="spectral_norm", + inputs=inputs, + outputs={"Out": out, }, + attrs={ + "dim": self._dim, + "power_iters": self._power_iters, + "eps": self._eps, + }) + + return out + + +class TreeConv(layers.Layer): + def __init__(self, + name_scope, + output_size, + num_filters=1, + max_depth=2, + act='tanh', + param_attr=None, + bias_attr=None, + name=None): + super(TreeConv, self).__init__(name_scope) + self._name = name + self._output_size = output_size + self._act = act + self._max_depth = max_depth + self._num_filters = num_filters + self._bias_attr = bias_attr + self._param_attr = param_attr + + def _build_once(self, nodes_vector, edge_set): + assert isinstance(nodes_vector, Variable) + assert isinstance(edge_set, Variable) + self._dtype = self._helper.input_dtype(nodes_vector) + + feature_size = nodes_vector.shape[2] + w_shape = [feature_size, 3, self._output_size, self._num_filters] + if self._bias_attr: + self._bias_param = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_filters], + dtype=self._dtype, + is_bias=True) + self.W = self.create_parameter( + attr=self._param_attr, + shape=w_shape, + dtype=self._dtype, + is_bias=False) + + def forward(self, nodes_vector, edge_set): + if self._name: + out = self.create_variable( + name=self._name, dtype=self._dtype, persistable=False) + else: + out = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + + self._helper.append_op( + type='tree_conv', + inputs={ + 'NodesVector': nodes_vector, + 'EdgeSet': edge_set, + 'Filter': self.W + }, + outputs={'Out': out, }, + attrs={'max_depth': self._max_depth}) + if self._bias_attr: + pre_activation = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [out], + 'Y': [self._bias_param]}, + outputs={'Out': [pre_activation]}, + attrs={'axis': 1}) + else: + pre_activation = out + return self._helper.append_activation(pre_activation, act=self._act) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 7fd9617cc7687a5a553ed22cfed560aef8058496..0bfb5717c8674b7a0cb0b56f02322f661e0b8ed0 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -560,6 +560,282 @@ class TestLayer(LayerTest): self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) + def test_conv3d(self): + with self.static_graph(): + images = layers.data( + name='pixel', shape=[3, 6, 6, 6], dtype='float32') + ret = layers.conv3d( + input=images, num_filters=3, filter_size=[2, 2, 2]) + static_ret = self.get_static_graph_result( + feed={'pixel': np.ones( + [2, 3, 6, 6, 6], dtype='float32')}, + fetch_list=[ret])[0] + + with self.static_graph(): + images = layers.data( + name='pixel', shape=[3, 6, 6, 6], dtype='float32') + conv3d = nn.Conv3D( + 'conv3d', num_channels=3, num_filters=3, filter_size=[2, 2, 2]) + ret = conv3d(images) + static_ret2 = self.get_static_graph_result( + feed={'pixel': np.ones( + [2, 3, 6, 6, 6], dtype='float32')}, + fetch_list=[ret])[0] + + with self.dynamic_graph(): + images = np.ones([2, 3, 6, 6, 6], dtype='float32') + conv3d = nn.Conv3D( + 'conv3d', num_channels=3, num_filters=3, filter_size=[2, 2, 2]) + dy_ret = conv3d(base.to_variable(images)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_row_conv(self): + input = np.arange(15).reshape([3, 5]).astype('float32') + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + with self.static_graph(): + x = layers.data( + name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.row_conv(input=x, future_context_size=2) + static_ret = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + x = layers.data( + name='X', + shape=[3, 5], + dtype='float32', + lod_level=1, + append_batch_size=False) + rowConv = nn.RowConv('RowConv', future_context_size=2) + ret = rowConv(x) + static_ret2 = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, + recursive_seq_lens=[[1, 1, 1]], + place=place, + with_lod=True) + }, + fetch_list=[ret])[0] + + with self.dynamic_graph(): + rowConv = nn.RowConv('RowConv', future_context_size=2) + dy_ret = rowConv(base.to_variable(input)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_group_norm(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + shape = (2, 4, 3, 3) + + input = np.random.random(shape).astype('float32') + + with self.static_graph(): + X = fluid.layers.data( + name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.group_norm(input=X, groups=2) + static_ret = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + X = fluid.layers.data( + name='X', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + groupNorm = nn.GroupNorm('GroupNorm', groups=2) + ret = groupNorm(X) + static_ret2 = self.get_static_graph_result( + feed={ + 'X': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.dynamic_graph(): + groupNorm = nn.GroupNorm('GroupNorm', groups=2) + dy_ret = groupNorm(base.to_variable(input)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_spectral_norm(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + + shape = (2, 4, 3, 3) + + input = np.random.random(shape).astype('float32') + + with self.static_graph(): + Weight = fluid.layers.data( + name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + ret = layers.spectral_norm(weight=Weight, dim=1, power_iters=2) + static_ret = self.get_static_graph_result( + feed={ + 'Weight': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place), + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.static_graph(): + Weight = fluid.layers.data( + name='Weight', + shape=shape, + dtype='float32', + lod_level=1, + append_batch_size=False) + spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) + ret = spectralNorm(Weight) + static_ret2 = self.get_static_graph_result( + feed={ + 'Weight': fluid.create_lod_tensor( + data=input, recursive_seq_lens=[[1, 1]], place=place) + }, + fetch_list=[ret], + with_lod=True)[0] + + with self.dynamic_graph(): + spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) + dy_ret = spectralNorm(base.to_variable(input)) + + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + self.assertTrue(np.allclose(static_ret, static_ret2)) + + def test_tree_conv(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + adj_array = [1, 2, 1, 3, 1, 4, 1, 5, 2, 6, 2, 7, 2, 8, 4, 9, 4, 10] + adj = np.array(adj_array).reshape((1, 9, 2)).astype('int32') + adj = np.tile(adj, (1, 1, 1)) + vectors = np.random.random((1, 10, 5)).astype('float32') + with self.static_graph(): + NodesVector = fluid.layers.data( + name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data( + name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + ret = layers.tree_conv( + nodes_vector=NodesVector, + edge_set=EdgeSet, + output_size=6, + num_filters=1, + max_depth=2) + static_ret = self.get_static_graph_result( + feed={ + 'NodesVector': fluid.create_lod_tensor( + data=vectors, recursive_seq_lens=[[1]], place=place), + 'EdgeSet': fluid.create_lod_tensor( + data=adj, recursive_seq_lens=[[1]], place=place) + }, + fetch_list=[ret], + with_lod=False)[0] + + with self.static_graph(): + NodesVector = fluid.layers.data( + name='NodesVector', + shape=(1, 10, 5), + dtype='float32', + lod_level=1, + append_batch_size=False) + EdgeSet = fluid.layers.data( + name='EdgeSet', + shape=(1, 9, 2), + dtype='int32', + lod_level=1, + append_batch_size=False) + treeConv = nn.TreeConv( + 'TreeConv', output_size=6, num_filters=1, max_depth=2) + ret = treeConv(NodesVector, EdgeSet) + static_ret2 = self.get_static_graph_result( + feed={ + 'NodesVector': fluid.create_lod_tensor( + data=vectors, recursive_seq_lens=[[1]], place=place), + 'EdgeSet': fluid.create_lod_tensor( + data=adj, recursive_seq_lens=[[1]], place=place) + }, + fetch_list=[ret], + with_lod=False)[0] + + with self.dynamic_graph(): + treeConv = nn.TreeConv( + 'SpectralNorm', output_size=6, num_filters=1, max_depth=2) + dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) + + self.assertTrue(np.allclose(static_ret, static_ret2)) + self.assertTrue(np.allclose(static_ret, dy_ret._numpy())) + + def test_conv3d_transpose(self): + input_array = np.arange(0, 48).reshape( + [2, 3, 2, 2, 2]).astype('float32') + + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') + out = layers.conv3d_transpose( + input=img, num_filters=12, output_size=[14, 14, 14]) + static_rlt = self.get_static_graph_result( + feed={'pixel': input_array}, fetch_list=[out])[0] + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') + conv3d_transpose = nn.Conv3DTranspose( + 'Conv3DTranspose', num_filters=12, output_size=[14, 14, 14]) + out = conv3d_transpose(img) + static_rlt2 = self.get_static_graph_result( + feed={'pixel': input_array}, fetch_list=[out])[0] + with self.dynamic_graph(): + conv3d_transpose = nn.Conv3DTranspose( + 'Conv3DTranspose', num_filters=12, output_size=[14, 14, 14]) + dy_rlt = conv3d_transpose(base.to_variable(input_array)) + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + class TestBook(unittest.TestCase): def test_fit_a_line(self):