diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc index a5928e4cfec0e64b485127896cee784b1de172c9..3374f04269bbb153ee4681ef551f371c0497d5a9 100755 --- a/paddle/operators/sequence_slice_op.cc +++ b/paddle/operators/sequence_slice_op.cc @@ -32,6 +32,14 @@ class SequenceSliceOp : public framework::OperatorWithKernel { "Output(Out) of SequenceSliceOp should not be null."); auto input_dims = ctx->GetInputDim("X"); + auto offset_dim = ctx->GetInputDim("Offset"); + auto length_dim = ctx->GetInputDim("Length"); + + PADDLE_ENFORCE_EQ(offset_dim.size(), 2UL, + "Only support one level sequence now."); + PADDLE_ENFORCE_EQ(length_dim.size(), 2UL, + "Only support one level sequence now."); + ctx->SetOutputDim("Out", input_dims); } @@ -95,7 +103,7 @@ It only supports sequence (LoD Tensor with level number is 1). [d1, d2; e1, e2]] LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2) - Offset = [0, 1]; Length = [2, 1] + Offset = [[0], [1]]; Length = [[2], [1]] Out = [[a1, a2; b1, b2] diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h index 0be50e680181f5a1d146ec0e43eb0d71b59d3bfe..4ca042b2ddad5612abb34f932037ebdcb3d2dd03 100755 --- a/paddle/operators/sequence_slice_op.h +++ b/paddle/operators/sequence_slice_op.h @@ -48,42 +48,42 @@ class SequenceSliceOpKernel : public framework::OpKernel { auto* length = ctx.Input("Length"); auto* out = ctx.Output("Out"); + auto lod = in->lod(); + auto n = lod[0].size() - 1; + + PADDLE_ENFORCE_EQ(lod.size(), 1UL, + "Only support one level sequence now."); + PADDLE_ENFORCE_EQ( + n, length->dims()[0], + "The size of input-sequence and length-array should be the same") + PADDLE_ENFORCE_EQ( + n, offset->dims()[0], + "The size of input-sequence and offset-array should be the same") + const int64_t* offset_data = offset->data(); const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; if (platform::is_gpu_place(ctx.GetPlace())) { - framework::Tensor offset_cpu; offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); offset_data = offset_cpu.data(); - framework::Tensor length_cpu; length_cpu.mutable_data(length->dims(), platform::CPUPlace()); length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); length_data = length_cpu.data(); } - auto lod = in->lod(); - auto n = lod[0].size() - 1; - - PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); - PADDLE_ENFORCE_EQ(offset->dims().size(), 1UL, - "Only support one level sequence now."); - PADDLE_ENFORCE_EQ(length->dims().size(), 1UL, - "Only support one level sequence now."); - PADDLE_ENFORCE_EQ( - n, length->dims()[0], - "The size of input-sequence and length-array should be the same") - PADDLE_ENFORCE_EQ( - n, offset->dims()[0], - "The size of input-sequence and offset-array should be the same") - for (size_t i = 0; i < n; ++i) { - PADDLE_ENFORCE_LT(0, offset_data[i], "The offset must greater than zero") - PADDLE_ENFORCE_LT(0, length_data[i], "The length must greater than zero") - PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i], - lod[0][i + 1], "The target tensor's length overflow") - } + PADDLE_ENFORCE_LT(0, offset_data[i], + "The offset must greater than zero") + PADDLE_ENFORCE_LT(0, length_data[i], + "The length must greater than zero") + PADDLE_ENFORCE_LT( + lod[0][i] + offset_data[i] + length_data[i], + lod[0][i + 1], + "The target tensor's length overflow")} out->mutable_data(ctx.GetPlace()); auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); @@ -100,7 +100,7 @@ class SequenceSliceOpKernel : public framework::OpKernel { Tensor in_t = in->Slice(static_cast(lod[0][i] + offset_data[i]), static_cast(lod[0][i] + offset_data[i] + - length_data[i])); + length_data[i])); StridedMemcpy(ctx.device_context(), in_t.data(), in_stride, in_t.dims(), out_stride, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5de1c18950a3236faa91edabf0119b590b22c6d9..a02eba007ddf929ff92df995df253f5a386bac7b 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2987,8 +2987,10 @@ def img_cmrnorm_layer(input, layer_attr=None): """ Response normalization across feature maps. - The details please refer to - `Alex's paper `_. + + Reference: + ImageNet Classification with Deep Convolutional Neural Networks + http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf The example usage is: @@ -2997,7 +2999,7 @@ def img_cmrnorm_layer(input, norm = img_cmrnorm_layer(input=net, size=5) :param name: The name of this layer. It is optional. - :type name: None | basestring + :type name: basestring :param input: The input of this layer. :type input: LayerOutput :param size: Normalize in number of :math:`size` feature maps. @@ -3006,9 +3008,11 @@ def img_cmrnorm_layer(input, :type scale: float :param power: The hyper-parameter. :type power: float - :param num_channels: input layer's filers number or channels. If - num_channels is None, it will be set automatically. - :param layer_attr: Extra Layer Attribute. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. + :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3036,7 +3040,7 @@ def batch_norm_layer(input, use_global_stats=None, mean_var_names=None): """ - Batch Normalization Layer. The notation of this layer as follow. + Batch Normalization Layer. The notation of this layer is as follows. :math:`x` is the input features over a mini-batch. @@ -3050,8 +3054,10 @@ def batch_norm_layer(input, \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - The details of batch normalization please refer to this - `paper `_. + Reference: + Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift + http://arxiv.org/abs/1502.03167 The example usage is: @@ -3061,48 +3067,47 @@ def batch_norm_layer(input, :param name: The name of this layer. It is optional. :type name: basestring - :param input: batch normalization input. Better be linear activation. - Because there is an activation inside batch_normalization. + :param input: This layer's input which is to be performed batch normalization on. :type input: LayerOutput :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm. batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm requires cuDNN version greater or equal to v4 (>=v4). But cudnn_batch_norm is faster and needs less memory than batch_norm. mkldnn_batch_norm requires - enable use_mkldnn. By default (None), we will - automaticly select cudnn_batch_norm for GPU, + use_mkldnn is enabled. By default (None), we will + automatically select cudnn_batch_norm for GPU, mkldnn_batch_norm for MKLDNN and batch_norm for CPU. - Otherwise, select batch norm type based on the - specified type. If you use cudnn_batch_norm, - we suggested you use latest version, such as v5.1. + Users can specify the batch norm type. If you use + cudnn_batch_norm, we suggested you use latest version, + such as v5.1. :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm" or "mkldnn_batch_norm" - :param act: Activation Type. Better be relu. Because batch - normalization will normalize input near zero. + :param act: Activation type. ReluActivation is the default activation. :type act: BaseActivation - :param num_channels: num of image channels or previous layer's number of - filters. None will automatically get from layer's - input. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param bias_attr: :math:`\\beta`, better be zero when initialize. So the - initial_std=0, initial_mean=1 is best practice. + :param bias_attr: :math:`\\beta`. The bias attribute. If the parameter is set to + False or an object whose type is not ParameterAttribute, no + bias is defined. If the parameter is set to True, the bias is + initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: :math:`\\gamma`, better be one when initialize. So the - initial_std=0, initial_mean=1 is best practice. + :param param_attr: :math:`\\gamma`. The parameter attribute. See ParameterAttribute + for details. :type param_attr: ParameterAttribute - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param use_global_stats: whether use moving mean/variance statistics - during testing peroid. If None or True, - it will use moving mean/variance statistics during - testing. If False, it will use the mean - and variance of current batch of test data for - testing. + :param use_global_stats: Whether use moving mean/variance statistics during + testing peroid. If the parameter is set to None or + True, it will use moving mean/variance statistics + during testing. If the parameter is set to False, it + will use the mean and variance of the current batch + of test data. :type use_global_stats: bool | None. - :param moving_average_fraction: Factor used in the moving average - computation, referred to as facotr, - :math:`runningMean = newMean*(1-factor) - + runningMean*factor` + :param moving_average_fraction: Factor used in the moving average computation. + :math:`runningMean = newMean*(1-factor) + runningMean*factor` :type moving_average_fraction: float. :param mean_var_names: [mean name, variance name] :type mean_var_names: string list @@ -3164,8 +3169,9 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. - :type layer_attr: ExtraLayerAttribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute + for details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -3200,7 +3206,8 @@ def row_l2_norm_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute + for details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -3237,22 +3244,17 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): act=ReluActivation(), bias_attr=False) - This layer just simply add all input layers together, then activate the sum - inputs. Each input of this layer should be the same size, which is also the - output size of this layer. + This layer just simply adds all input layers together, then activates the + sum. All inputs should share the same dimension, which is also the dimension + of this layer's output. There is no weight matrix for each input, because it just a simple add operation. If you want a complicated operation before add, please use mixed_layer. - It is a very good way to set dropout outside the layers. Since not all - PaddlePaddle layer support dropout, you can add an add_to layer, set - dropout here. - Please refer to dropout_layer for details. - :param name: The name of this layer. It is optional. :type name: basestring - :param input: Input layers. It could be a LayerOutput or list/tuple of + :param input: The input layers. It could be a LayerOutput or list/tuple of LayerOutput. :type input: LayerOutput | list | tuple :param act: Activation Type. LinearActivation is the default activation. @@ -3261,7 +3263,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3300,8 +3303,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): @layer_support(DROPOUT, ERROR_CLIPPING) def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): """ - Concat all input vector into one huge vector. - Inputs can be list of LayerOutput or list of projection. + Concatenate all input vectors to one vector. + Inputs can be a list of LayerOutput or a list of projection. The example usage is: @@ -3311,11 +3314,12 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param input: input layers or projections + :param input: The input layers or projections :type input: list | tuple | collections.Sequence :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3385,7 +3389,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, bias_attr=None): """ - Concat sequence a with sequence b. + Concatenate sequence a and sequence b. Inputs: - a = [a1, a2, ..., am] @@ -3404,13 +3408,14 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, :param name: The name of this layer. It is optional. :type name: basestring - :param a: input sequence layer + :param a: The first input sequence layer :type a: LayerOutput - :param b: input sequence layer + :param b: The second input sequence layer :type b: LayerOutput :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the @@ -3447,31 +3452,25 @@ def memory(name, boot_bias_active_type=None, boot_with_const_id=None): """ - The memory layers is a layer cross each time step. Reference this output - as previous time step layer :code:`name` 's output. + The memory takes a layer's output at previous time step as its own output. - The default memory is zero in first time step, previous time step's - output in the rest time steps. + If boot_bias, the activation of the bias is the initial value of the memory. - If boot_bias, the first time step value is this bias and - with activation. + If boot_with_const_id is set, then the memory's output at the first time step + is a IndexSlot, the Arguments.ids()[0] is this :code:`cost_id`. - If boot_with_const_id, then the first time stop is a IndexSlot, the - Arguments.ids()[0] is this :code:`cost_id`. + If boot_layer is specified, the memory's output at the first time step will + be the boot_layer's output. - If boot_layer is not null, the memory is just the boot_layer's output. - Set :code:`is_seq` is true boot layer is sequence. - - The same name layer in recurrent group will set memory on each time - step. + In other case, the default memory's output at the first time step is zero. .. code-block:: python mem = memory(size=256, name='state') state = fc_layer(input=mem, size=256, name='state') - If you do not want to specify the name, you can equivalently use set_input() - to specify the layer needs to be remembered as the following: + If you do not want to specify the name, you can also use set_input() + to specify the layer to be remembered as the following: .. code-block:: python @@ -3479,26 +3478,31 @@ def memory(name, state = fc_layer(input=mem, size=256) mem.set_input(mem) - :param name: the name of the layer which this memory remembers. + :param name: The name of the layer which this memory remembers. If name is None, user should call set_input() to specify the name of the layer which this memory remembers. :type name: basestring - :param size: size of memory. + :param size: The dimensionality of memory. :type size: int - :param memory_name: the name of the memory. - It is ignored when name is provided. + :param memory_name: The name of the memory. It is ignored when name is provided. :type memory_name: basestring :param is_seq: DEPRECATED. is sequence for boot_layer :type is_seq: bool - :param boot_layer: boot layer of memory. + :param boot_layer: This parameter specifies memory's output at the first time + step and the output is boot_layer's output. :type boot_layer: LayerOutput | None - :param boot_bias: boot layer's bias + :param boot_bias: The bias attribute of memory's output at the first time step. + If the parameter is set to False or an object whose type is not + ParameterAttribute, no bias is defined. If the parameter is set + to True, the bias is initialized to zero. :type boot_bias: ParameterAttribute | None - :param boot_bias_active_type: boot layer's active type. + :param boot_bias_active_type: Activation type for memory's bias at the first time + step. LinearActivation is the default activation. :type boot_bias_active_type: BaseActivation - :param boot_with_const_id: boot layer's id. + :param boot_with_const_id: This parameter specifies memory's output at the first + time step and the output is an index. :type boot_with_const_id: int - :return: LayerOutput object which is a memory. + :return: LayerOutput object. :rtype: LayerOutput """ if boot_bias_active_type is None: diff --git a/python/paddle/v2/framework/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py similarity index 94% rename from python/paddle/v2/framework/tests/test_beam_search_op.py rename to python/paddle/v2/fluid/tests/test_beam_search_op.py index a5a0cc0c96ab6c77abcc20c387eef7b3b1c8ac93..cc7c09bb59de3f83e47b4d95c1203f7f050c5132 100644 --- a/python/paddle/v2/framework/tests/test_beam_search_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py @@ -1,6 +1,6 @@ import logging -from paddle.v2.framework.op import Operator, DynamicRecurrentOp -import paddle.v2.framework.core as core +from paddle.v2.fluid.op import Operator, DynamicRecurrentOp +import paddle.v2.fluid.core as core import unittest import numpy as np diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py index 80f4bfbdd1160c1423c984e73cac54fbd3831241..4351d8e6d77c16e0012f9ae163b118fdbb793a8f 100755 --- a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py +++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py @@ -9,16 +9,16 @@ class TestSequenceSliceOp(OpTest): # only supprot one level LoD x = np.random.random(self.x_dim).astype('float32') lod = self.x_lod - offset = np.array(self.offset).flatten().astype("int64") - length = np.array(self.length).flatten().astype("int64") + offset = np.array(self.offset).astype("int64") + length = np.array(self.length).astype("int64") self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length} outs = [] #np.zeros((100, 3, 2)).astype('float32') out_lod = [[0]] out_lod_offset = 0 for i in range(len(offset)): - sub_x = x[lod[0][i] + offset[i]: lod[0] - [i] + offset[i] + length[i], :] + sub_x = x[lod[0][i] + offset[i, 0]: lod[0] + [i] + offset[i, 0] + length[i, 0], :] out_lod_offset = out_lod_offset + len(sub_x) outs.append(sub_x) out_lod[0].append(out_lod_offset) @@ -28,8 +28,8 @@ class TestSequenceSliceOp(OpTest): def init_test_case(self): self.x_dim = (100, 3, 2) self.x_lod = [[0, 20, 40, 60, 80, 100]] - self.offset = [1, 2, 3, 4, 5] - self.length = [10, 8, 6, 4, 2] + self.offset = [[1], [2], [3], [4], [5]] + self.length = [[10], [8], [6], [4], [2]] def setUp(self): self.op_type = "sequence_slice"