From 0d9d25d40f61598bfb732956b06208770a5f4d59 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Wed, 27 Mar 2019 15:14:10 +0800 Subject: [PATCH] Feature/refactor layers to Layers (#16337) * test=develop, add some Layers and tests * test=develop, add more layers * test=develop, add more layers * test=develop, add force cpu option * Update test_layers.py remove pdb * test=develop, refine code --- paddle/fluid/operators/lookup_table_op.cc | 2 +- python/paddle/fluid/imperative/nn.py | 672 +++++++++++++++++- .../fluid/tests/unittests/test_layers.py | 320 ++++++++- 3 files changed, 983 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index 8d1ebe6b1..d635fc617 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -33,7 +33,7 @@ class LookupTableOp : public framework::OperatorWithKernel { auto table_dims = ctx->GetInputDim("W"); auto ids_dims = ctx->GetInputDim("Ids"); int ids_rank = ids_dims.size(); - + VLOG(5) << "ids rank is " << ids_rank << std::endl; PADDLE_ENFORCE_EQ(table_dims.size(), 2); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, "The last dimension of the 'Ids' tensor must be 1."); diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 9cbfc3e3e..9856276b2 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -15,6 +15,7 @@ from __future__ import print_function from six.moves import reduce +import numpy as np from .. import core from ..layers import utils @@ -22,9 +23,11 @@ from . import layers from ..framework import Variable, OpProtoHolder from ..layers import layer_function_generator from ..param_attr import ParamAttr -from ..initializer import Normal, Constant +from ..initializer import Normal, Constant, NumpyArrayInitializer + __all__ = [ - 'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm' + 'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm', + 'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', 'SequenceConv' ] @@ -729,3 +732,668 @@ class GRUUnit(layers.Layer): }) return updated_hidden, reset_hidden_pre, gate + + +class NCE(layers.Layer): + """ + ${comment} + + Args: + input (Variable): input variable. + label (Variable): label. + num_total_classes (int):${num_total_classes_comment} + sample_weight (Variable|None): A Variable of shape [batch_size, 1] + storing a weight for each sample. The default weight for each + sample is 1.0. + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of nce. If it is set to None or one attribute of ParamAttr, nce + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, nce + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + num_neg_samples (int): ${num_neg_samples_comment} + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: None. + sampler (str): The sampler used to sample class from negtive classes. + It can be 'uniform', 'log_uniform' or 'custom_dist'. + default: 'uniform'. + custom_dist (float[]): A float[] with size=num_total_classes. + It is used when sampler is set to 'custom_dist'. + custom_dist[i] is the probsbility of i-th class to be sampled. + default: None. + seed (int): The seed used in sampler. default: 0. + is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows. + + Returns: + Variable: The output nce loss. + + Examples: + .. code-block:: python + + window_size = 5 + words = [] + for i in xrange(window_size): + words.append(layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + dict_size = 10000 + label_word = int(window_size / 2) + 1 + + embs = [] + for i in xrange(window_size): + if i == label_word: + continue + + emb = layers.embedding(input=words[i], size=[dict_size, 32], + param_attr='emb.w', is_sparse=True) + embs.append(emb) + + embs = layers.concat(input=embs, axis=1) + loss = layers.nce(input=embs, label=words[label_word], + num_total_classes=dict_size, param_attr='nce.w', + bias_attr='nce.b') + + #or use custom distribution + dist = fluid.layers.assign(input=np.array([0.05,0.5,0.1,0.3,0.05]).astype("float32")) + loss = layers.nce(input=embs, label=words[label_word], + num_total_classes=5, param_attr='nce.w', + bias_attr='nce.b', + num_neg_samples=3, + sampler="custom_dist", + custom_dist=dist) + + """ + + def __init__(self, + name_scope, + num_total_classes, + param_attr=None, + bias_attr=None, + num_neg_samples=None, + sampler="uniform", + custom_dist=None, + seed=0, + is_sparse=False): + super(NCE, self).__init__(name_scope) + self._param_attr = param_attr + self._bias_attr = bias_attr + self._num_total_classes = num_total_classes + + self._inputs = dict() + + if sampler == "uniform": + sampler = 0 + elif sampler == "log_uniform": + sampler = 1 + elif sampler == "custom_dist": + assert custom_dist is not None + # assert isinstance(custom_dist, Variable) + + custom_dist_len = len(custom_dist) + alias_probs_ = [0] * custom_dist_len + alias_ = [0] * custom_dist_len + bigs = [] + littles = [] + for i in range(custom_dist_len): + normal_prob = custom_dist[i] * custom_dist_len + if normal_prob - 1.0 > 0: + bigs.append((i, normal_prob)) + elif 1.0 - normal_prob > 0: + littles.append((i, normal_prob)) + else: + alias_probs_[i] = normal_prob + alias_[i] = -1 + + while len(bigs) and len(littles): + big = bigs.pop(0) + little = littles.pop(0) + + big_idx = big[0] + big_prob = big[1] + + alias_probs_[little[0]] = little[1] + alias_[little[0]] = big_idx + big_left = big[1] + little[1] - 1 + if big_left - 1.0 > 0: + bigs.append((big_idx, big_left)) + elif 1.0 - big_left > 0: + littles.append((big_idx, big_left)) + else: + alias_probs_[big_idx] = big_left + alias_[big_idx] = -1 + + if len(bigs): + big = bigs.pop(0) + alias_probs_[big[0]] = 1.0 + alias_[big[0]] = -1 + if len(littles): + little = littles.pop(0) + alias_probs_[little[0]] = 1.0 + alias_[little[0]] = -1 + + def _init_by_numpy_array(numpy_array): + ret = self.create_parameter( + attr=ParamAttr(), + shape=numpy_array.shape, + dtype=numpy_array.dtype, + default_initializer=NumpyArrayInitializer(numpy_array)) + ret.stop_gradient = True + return ret + + self._inputs['CustomDistProbs'] = _init_by_numpy_array( + np.array(custom_dist).astype('float32')) + self._inputs['CustomDistAlias'] = _init_by_numpy_array( + np.array(alias_).astype('int32')) + self._inputs['CustomDistAliasProbs'] = _init_by_numpy_array( + np.array(alias_probs_).astype('float32')) + sampler = 2 + else: + raise Exception("Unsupported sampler type.") + + if num_neg_samples is None: + num_neg_samples = 10 + else: + num_neg_samples = int(num_neg_samples) + self._num_neg_samples = num_neg_samples + remote_prefetch = is_sparse + print( + "With sparse mode, if your models has only small parameter prefetch may cause speed down" + ) + self._attrs = { + 'num_total_classes': int(num_total_classes), + 'num_neg_samples': num_neg_samples, + 'seed': seed, + 'sampler': sampler, + 'is_sparse': is_sparse, + 'remote_prefetch': remote_prefetch + } + + def _build_once(self, input, label, sample_weight=None): + assert isinstance(input, Variable) + assert isinstance(label, Variable) + + dim = input.shape[1] + num_true_class = label.shape[1] + self._w = self.create_parameter( + attr=self._param_attr, + shape=[self._num_total_classes, dim], + is_bias=False, + dtype=input.dtype) + if self._bias_attr: + self._b = self.create_parameter( + attr=self._bias_attr, + shape=[self._num_total_classes, 1], + is_bias=True, + dtype=input.dtype) + self._inputs['Bias'] = self._b + self._inputs['Weight'] = self._w + + def forward(self, input, label, sample_weight=None): + assert isinstance(input, Variable) + assert isinstance(label, Variable) + + self._inputs['Input'] = input + self._inputs['Label'] = label + self._inputs['SampleWeight'] = sample_weight if sample_weight is not None else [] + + cost = self._helper.create_variable_for_type_inference( + dtype=input.dtype) + sample_logits = self._helper.create_variable_for_type_inference( + dtype=input.dtype) + sample_labels = self._helper.create_variable_for_type_inference( + dtype=label.dtype) + + self._helper.append_op( + type='nce', + inputs=self._inputs, + outputs={ + 'Cost': cost, + 'SampleLogits': sample_logits, + 'SampleLabels': sample_labels + }, + attrs=self._attrs) + return cost / (self._num_neg_samples + 1) + + +class PRelu(layers.Layer): + """ + Equation: + + .. math:: + y = \max(0, x) + \\alpha * \min(0, x) + + Args: + x (Variable): The input tensor. + param_attr(ParamAttr|None): The parameter attribute for the learnable + weight (alpha). + mode (string): The mode for weight sharing. It supports all, channel + and element. all: all elements share same weight + channel:elements in a channel share same weight + element:each element has a weight + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The output tensor with the same shape as input. + + Examples: + + .. code-block:: python + + x = fluid.layers.data(name="x", shape=[10,10], dtype="float32") + mode = 'channel' + output = fluid.layers.prelu(x,mode) + """ + + def __init__(self, name_scope, mode, param_attr=None): + + super(PRelu, self).__init__(name_scope) + self._mode = mode + self._param_attr = param_attr + if self._mode not in ['all', 'channel', 'element']: + raise ValueError('mode should be one of all, channel, element.') + self._alpha_shape = [1] + + def _build_once(self, input): + if self._mode == 'channel': + self._alpha_shape = [1, input.shape[1], 1, 1] + elif self._mode == 'element': + self._alpha_shape = input.shape + self._dtype = self._helper.input_dtype(input) + self._alpha = self.create_parameter( + attr=self._param_attr, + shape=self._alpha_shape, + dtype='float32', + is_bias=False, + default_initializer=Constant(1.0)) + + def forward(self, input): + + out = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type="prelu", + inputs={"X": input, + 'Alpha': self._alpha}, + attrs={"mode": self._mode}, + outputs={"Out": out}) + return out + + +class BilinearTensorProduct(layers.Layer): + """ + **Add Bilinear Tensor Product Layer** + + This layer performs bilinear tensor product on two inputs. + For example: + + .. math:: + out_{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1 + + In this formula: + - :math:`x`: the first input contains M elements, shape is [batch_size, M]. + - :math:`y`: the second input contains N elements, shape is [batch_size, N]. + - :math:`W_{i}`: the i-th learned weight, shape is [M, N] + - :math:`out_{i}`: the i-th element of out, shape is [batch_size, size]. + - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`. + + Args: + x (Variable): 2-D input tensor with shape [batch_size, M] + y (Variable): 2-D input tensor with shape [batch_size, N] + size (int): The dimension of this layer. + act (str, default None): Activation to be applied to the output of this layer. + name (str, default None): The name of this layer. + param_attr (ParamAttr, default None): The parameter attribute for the learnable w. + parameters/weights of this layer. + bias_attr (ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + + Returns: + Variable: A 2-D Tensor of shape [batch_size, size]. + + Examples: + .. code-block:: python + + tensor = bilinear_tensor_product(x=layer1, y=layer2, size=1000) + """ + + def __init__(self, + name_scope, + size, + name=None, + act=None, + param_attr=None, + bias_attr=None): + super(BilinearTensorProduct, self).__init__(name_scope) + self._param_attr = param_attr + self._bias_attr = bias_attr + self._act = act + self._size = size + self._name = name + self._inputs = dict() + + def _build_once(self, x, y): + self._dtype = self._helper.input_dtype(x) + + param_shape = [self._size, x.shape[1], y.shape[1]] + + self._w = self.create_parameter( + attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + is_bias=False) + + if self._bias_attr: + bias_size = [1, self._size] + bias = self.create_parameter( + attr=self._bias_attr, + shape=bias_size, + dtype=self._dtype, + is_bias=True) + self._inputs["Bias"] = bias + + def forward(self, x, y): + self._inputs = {"X": x, "Y": y, "Weight": self._w} + if self._name is not None: + out = self._helper.create_variable( + name=".".join([self.full_name(), self._name]), + dtype=self._dtype, + persistable=False) + else: + out = self._helper.create_variable( + dtype=self._dtype, persistable=False) + self._helper.append_op( + type="bilinear_tensor_product", + inputs=self._inputs, + outputs={"Out": out}) + + # add activation + return self._helper.append_activation(out) + + +class Conv2DTranspose(layers.Layer): + """ + **Convlution2D transpose layer** + + The convolution2D transpose layer calculates the output based on the input, + filter, and dilations, strides, paddings. Input(Input) and output(Output) + are in NCHW format. Where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. + Parameters(dilations, strides, paddings) are two elements. These two elements + represent height and width, respectively. The details of convolution transpose + layer, please refer to the following explanation and references + `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a tensor with NCHW format. + * :math:`W`: Filter value, a tensor with MCHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)` + + - Output: + + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\ + H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\ + W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) + + Args: + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of the filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). None if use + filter_size, padding, and stride to calculate output_size. + if output_size and filter_size are specified at the same time, They + should follow the formula above. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups(int): The groups number of the Conv2d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups = 1. + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d_transpose. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d_transpose + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True. + act (str): Activation type, if it is set to None, activation is not appended. + Default: None. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: True. + + Returns: + Variable: The tensor variable storing the convolution transpose result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3) + """ + + def __init__(self, + name_scope, + num_filters, + output_size=None, + filter_size=None, + padding=0, + stride=1, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None): + super(Conv2DTranspose, self).__init__(name_scope) + assert param_attr is not False, "param_attr should not be False in conv2d_transpose." + self._param_attr = param_attr + self._bias_attr = bias_attr + self._groups = groups + self._num_filters = num_filters + self._use_cudnn = use_cudnn + self._padding = padding + self._stride = stride + self._dilation = dilation + self._filter_size = filter_size + self._output_size = output_size + self._op_type = 'conv2d_transpose' + + def _build_once(self, input): + input_channel = input.shape[1] + if (input_channel == self._groups and + self._num_filters == input_channel and not self._use_cudnn): + self._op_type = 'depthwise_conv2d_transpose' + + if not isinstance(input, Variable): + raise TypeError("Input of conv2d_transpose must be Variable") + + self._padding = utils.convert_to_list(self._padding, 2, 'padding') + self._stride = utils.convert_to_list(self._stride, 2, 'stride') + self._dilation = utils.convert_to_list(self._dilation, 2, 'dilation') + + if not isinstance(self._use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + + if self._filter_size is None: + if self._output_size is None: + raise ValueError( + "output_size must be set when filter_size is None") + if isinstance(self._output_size, int): + self._output_size = [self._output_size, self._output_size] + + h_in = input.shape[2] + w_in = input.shape[3] + + filter_size_h = (self._output_size[0] - + (h_in - 1) * self._stride[0] + 2 * self._padding[0] + - 1) // self._dilation[0] + 1 + filter_size_w = (self._output_size[1] - + (w_in - 1) * self._stride[1] + 2 * self._padding[1] + - 1) // self._dilation[1] + 1 + self._filter_size = [filter_size_h, filter_size_w] + else: + self._filter_size = utils.convert_to_list( + self._output_size, 2, 'conv2d_transpose.filter_size') + + if self._output_size is None: + self._output_size = [] + elif isinstance(self._output_size, list) or isinstance( + self._output_size, int): + self._output_size = utils.convert_to_list(self._output_size, 2, + 'output_size') + else: + raise ValueError("output_size should be list or int") + self._padding = utils.convert_to_list(self._padding, 2, 'padding') + self._groups = 1 if self._groups is None else self._groups + filter_shape = [input_channel, self._num_filters // self._groups + ] + self._filter_size + + self._img_filter = self.create_parameter( + dtype=input.dtype, shape=filter_shape, attr=self._param_attr) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference( + dtype=input.dtype) + self._helper.append_op( + type=self._op_type, + inputs={'Input': [input], + 'Filter': [self._img_filter]}, + outputs={'Output': pre_bias}, + attrs={ + 'output_size': self._output_size, + 'strides': self._stride, + 'paddings': self._padding, + 'dilations': self._dilation, + 'groups': self._groups, + 'use_cudnn': self._use_cudnn + }) + + pre_act = self._helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) + out = self._helper.append_activation(pre_act) + return out + + +class SequenceConv(layers.Layer): + """ + This function creates the op for sequence_conv, using the inputs and + other convolutional configurations for the filters and stride as given + in the input parameters to the function. + + Args: + input (Variable): ${x_comment} + num_filters (int): number of filters. + filter_size (int): the filter size (H and W). + filter_stride (int): stride of the filter. + padding (bool): if True, add paddings. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, sequence_conv + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv + will create ParamAttr as param_attr. If the Initializer of the param_attr + is not set, the parameter is initialized with Xavier. Default: None. + act (str): Activation type, if it is set to None, activation is not appended. + Default: None. + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: None. + + Returns: + Variable: output of sequence_conv + """ + + def __init__(self, + name_scope, + num_filters, + filter_size=3, + filter_stride=1, + padding=None, + bias_attr=None, + param_attr=None, + act=None): + super(SequenceConv, self).__init__(name_scope) + self._num_filters = num_filters + self._filter_size = filter_size + self._filter_stride = filter_stride + self._padding = padding + self._bias_attr = bias_attr + self._param_attr = param_attr + + def _build_once(self, input): + + self._dtype = self._helper.input_dtype(input) + print(self._filter_size) + filter_shape = [self._filter_size * input.shape[1], self._num_filters] + self._filter_param = self.create_parameter( + attr=self.param_attr, shape=filter_shape, dtype=self._dtype) + + def forward(self, input): + pre_bias = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type='sequence_conv', + inputs={ + 'X': [input], + 'Filter': [self._filter_param], + }, + outputs={"Out": pre_bias}, + attrs={ + 'contextStride': self._filter_stride, + 'contextStart': -int(self._filter_size // 2), + 'contextLength': self._filter_size + }) + pre_act = self._helper.append_bias_op(pre_bias) + return self._helper.append_activation(pre_act) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 1429d2410..7fd9617cc 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -42,10 +42,14 @@ class LayerTest(unittest.TestCase): def tearDownClass(cls): pass - def _get_place(self): - if core.is_compiled_with_cuda(): - return core.CUDAPlace(0) - return core.CPUPlace() + def _get_place(self, force_to_use_cpu=False): + # this option for ops that only have cpu kernel + if force_to_use_cpu: + return core.CPUPlace() + else: + if core.is_compiled_with_cuda(): + return core.CUDAPlace(0) + return core.CPUPlace() @contextlib.contextmanager def static_graph(self): @@ -54,16 +58,18 @@ class LayerTest(unittest.TestCase): fluid.default_main_program().random_seed = self.seed yield - def get_static_graph_result(self, feed, fetch_list): + def get_static_graph_result(self, feed, fetch_list, with_lod=False): exe = fluid.Executor(self._get_place()) exe.run(fluid.default_startup_program()) return exe.run(fluid.default_main_program(), feed=feed, - fetch_list=fetch_list) + fetch_list=fetch_list, + return_numpy=(not with_lod)) @contextlib.contextmanager - def dynamic_graph(self): - with fluid.imperative.guard(self._get_place()): + def dynamic_graph(self, force_to_use_cpu=False): + with fluid.imperative.guard( + self._get_place(force_to_use_cpu=force_to_use_cpu)): fluid.default_startup_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed yield @@ -256,6 +262,304 @@ class TestLayer(LayerTest): self.assertTrue(np.allclose(n, min_ret._numpy())) self.assertTrue(np.allclose(n2, max_ret._numpy())) + def test_sequence_conv(self): + inp_np = np.arange(12).reshape([3, 4]).astype('float32') + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with self.static_graph(): + seq = layers.data( + name='seq_in', + shape=[3, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + out = layers.sequence_conv(seq, 2) + static_rlt = self.get_static_graph_result( + feed={ + "seq_in": fluid.create_lod_tensor( + data=inp_np, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[out], + with_lod=True)[0] + + with self.static_graph(): + seq = layers.data( + name='seq_in', + shape=[3, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + seq_conv = nn.SequenceConv('seq_conv', num_filters=2) + out = seq_conv(seq) + static_rlt2 = self.get_static_graph_result( + feed={ + "seq_in": fluid.create_lod_tensor( + data=inp_np, + recursive_seq_lens=[[1, 1, 1]], + place=place) + }, + fetch_list=[out], + with_lod=True)[0] + self.assertTrue( + np.allclose(np.array(static_rlt), np.array(static_rlt2))) + + def test_conv2d_transpose(self): + inp_np = np.arange(0, 24).reshape([2, 3, 2, 2]).astype('float32') + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') + out = layers.conv2d_transpose( + input=img, num_filters=10, output_size=28) + static_rlt = self.get_static_graph_result( + feed={'pixel': inp_np}, fetch_list=[out])[0] + with self.static_graph(): + img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') + conv2d_transpose = nn.Conv2DTranspose( + 'conv2d_transpose', num_filters=10, output_size=28) + out = conv2d_transpose(img) + static_rlt2 = self.get_static_graph_result( + feed={'pixel': inp_np}, fetch_list=[out])[0] + with self.dynamic_graph(): + conv2d_transpose = nn.Conv2DTranspose( + 'conv2d_transpose', num_filters=10, output_size=28) + dy_rlt = conv2d_transpose(base.to_variable(inp_np)) + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + + def test_bilinear_tensor_product(self): + inp_np_x = np.array([[1, 2, 3]]).astype('float32') + inp_np_y = np.array([[4, 5, 6]]).astype('float32') + + with self.static_graph(): + data_x = layers.data( + name='x', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + data_y = layers.data( + name='y', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + out = layers.bilinear_tensor_product(data_x, data_y, 6) + + static_rlt = self.get_static_graph_result( + feed={'x': inp_np_x, + 'y': inp_np_y}, fetch_list=[out])[0] + with self.static_graph(): + data_x = layers.data( + name='x', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + data_y = layers.data( + name='y', + shape=[1, 3], + dtype="float32", + append_batch_size=False) + btp = nn.BilinearTensorProduct('btp', 6) + out = btp(data_x, data_y) + static_rlt2 = self.get_static_graph_result( + feed={'x': inp_np_x, + 'y': inp_np_y}, fetch_list=[out])[0] + with self.dynamic_graph(): + btp = nn.BilinearTensorProduct('btp', 6) + dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) + + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + + def test_prelu(self): + inp_np = np.ones([5, 200, 100, 100]).astype('float32') + + with self.static_graph(): + data_t = layers.data( + name="input", + shape=[5, 200, 100, 100], + dtype="float32", + append_batch_size=False) + mode = 'channel' + out = layers.prelu( + data_t, mode, param_attr=ParamAttr(initializer=Constant(1.0))) + static_rlt = self.get_static_graph_result( + feed={"input": inp_np}, fetch_list=[out])[0] + + with self.static_graph(): + data_t = layers.data( + name="input", + shape=[5, 200, 100, 100], + dtype="float32", + append_batch_size=False) + mode = 'channel' + prelu = nn.PRelu( + 'prelu', + mode=mode, + param_attr=ParamAttr(initializer=Constant(1.0))) + out = prelu(data_t) + static_rlt2 = self.get_static_graph_result( + feed={"input": inp_np}, fetch_list=[out])[0] + + with self.dynamic_graph(): + mode = 'channel' + prelu = nn.PRelu( + 'prelu', + mode=mode, + param_attr=ParamAttr(initializer=Constant(1.0))) + dy_rlt = prelu(base.to_variable(inp_np)) + + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt)) + + def test_embeding(self): + inp_word = np.array([[[1]]]).astype('int64') + dict_size = 20 + with self.static_graph(): + data_t = layers.data(name='word', shape=[1], dtype='int64') + emb = layers.embedding( + input=data_t, + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + static_rlt = self.get_static_graph_result( + feed={'word': inp_word}, fetch_list=[emb])[0] + with self.static_graph(): + data_t = layers.data(name='word', shape=[1], dtype='int64') + emb2 = nn.Embedding( + name_scope='embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + emb_rlt = emb2(data_t) + static_rlt2 = self.get_static_graph_result( + feed={'word': inp_word}, fetch_list=[emb_rlt])[0] + with self.dynamic_graph(): + emb2 = nn.Embedding( + name_scope='embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + static_rlt3 = emb2(base.to_variable(inp_word)) + + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt)) + + def test_nce(self): + window_size = 5 + dict_size = 20 + label_word = int(window_size // 2) + 1 + inp_word = np.array([[[1]], [[2]], [[3]], [[4]], [[5]]]).astype('int64') + nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32') + seed = 1 + with self.static_graph(): + words = [] + for i in range(window_size): + words.append( + layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + embs = [] + for i in range(window_size): + if i == label_word: + continue + + emb = layers.embedding( + input=words[i], + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + embs.append(emb) + + embs = layers.concat(input=embs, axis=1) + nce_loss = layers.nce(input=embs, + label=words[label_word], + num_total_classes=dict_size, + num_neg_samples=2, + sampler="custom_dist", + custom_dist=nid_freq_arr.tolist(), + seed=seed, + param_attr='nce.w', + bias_attr='nce.b') + feed_dict = dict() + for i in range(window_size): + feed_dict['word_{0}'.format(i)] = inp_word[i] + static_rlt = self.get_static_graph_result( + feed=feed_dict, fetch_list=[nce_loss])[0] + with self.static_graph(): + words = [] + for i in range(window_size): + words.append( + layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + emb = nn.Embedding( + 'embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + + embs2 = [] + for i in range(window_size): + if i == label_word: + continue + + emb_rlt = emb(words[i]) + embs2.append(emb_rlt) + + embs2 = layers.concat(input=embs2, axis=1) + nce = nn.NCE('nce', + num_total_classes=dict_size, + num_neg_samples=2, + sampler="custom_dist", + custom_dist=nid_freq_arr.tolist(), + seed=seed, + param_attr='nce.w', + bias_attr='nce.b') + + nce_loss2 = nce(embs2, words[label_word]) + feed_dict = dict() + for i in range(len(words)): + feed_dict['word_{0}'.format(i)] = inp_word[i] + + static_rlt2 = self.get_static_graph_result( + feed=feed_dict, fetch_list=[nce_loss2])[0] + + with self.dynamic_graph(force_to_use_cpu=True): + words = [] + for i in range(window_size): + words.append(base.to_variable(inp_word[i])) + + emb = nn.Embedding( + 'embedding', + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=False) + + embs3 = [] + for i in range(window_size): + if i == label_word: + continue + + emb_rlt = emb(words[i]) + embs3.append(emb_rlt) + + embs3 = layers.concat(input=embs3, axis=1) + nce = nn.NCE('nce', + num_total_classes=dict_size, + num_neg_samples=2, + sampler="custom_dist", + custom_dist=nid_freq_arr.tolist(), + seed=seed, + param_attr='nce.w', + bias_attr='nce.b') + + nce_loss3 = nce(embs3, words[label_word]) + + self.assertTrue(np.allclose(static_rlt2, static_rlt)) + self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt)) + class TestBook(unittest.TestCase): def test_fit_a_line(self): -- GitLab