diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 82c35360c222483151bec40ac12a7e776d2136cb..4788d9c1b29982394d3574320b535973529a88d8 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -134,14 +134,14 @@ paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'b paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e')) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'bc7a0fd2bb2b35dfd2f54947320e78fa')) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '933b7e268c4ffa3d5c3ef953a5ee9f0b')) -paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) +paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '07bb25484c98d529fbe67338422724af')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9')) paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c')) paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'padding_start', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, True, None, None, None, None, None)), ('document', '2bf23e7884c380c3b27f2709aa322cb9')) -paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCHW')), ('document', 'b8da17862ba02b5297a37d2edd571d76')) -paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCDHW')), ('document', '73a15322d460ef9aa90d4d237b0bc5d5')) +paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCHW')), ('document', 'b9be3712a46e196c7329eed52ed91d05')) +paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCDHW')), ('document', 'a7e4573745c40b8b1d726709f209b6e4')) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test', 'pad_value'], varargs=None, keywords=None, defaults=(False, 0.0)), ('document', 'e90a93251c52dc4e6fb34fb3991b3f82')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'eaa9d0bbd3d4e017c8bc4ecdac483711')) paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'cee673c79e3ff4582656a24e04f841e5')) @@ -149,12 +149,12 @@ paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'po paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCDHW')), ('document', 'db0035a3132b1dfb12e53c57591fb9f6')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '52343203de40afe29607397e13aaf0d2')) paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '55db6ae7275fb9678a6814aebab81a9c')) -paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '9e5a9f4f6d82d34a33d9ca632379cbcc')) -paddle.fluid.layers.instance_norm (ArgSpec(args=['input', 'epsilon', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None)), ('document', '02972097e089629efdb0ed9404fd36ae')) +paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'b88a2a2d5de3e6d845d134782fb54857')) +paddle.fluid.layers.instance_norm (ArgSpec(args=['input', 'epsilon', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None)), ('document', '5e2d18e85599ede7e71b06ed64d0f69e')) paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', '2460b30fb87037555208fa8ac6fc1787')) paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '83e08f21af41ac8bac37aeab1f86fdd0')) -paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCHW')), ('document', '9391d75358b6cba0cc5d22a01a223420')) -paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCDHW')), ('document', '74bce3cd4224e6ff133d54508dc7f150')) +paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCHW')), ('document', '0ca6c549ac2b63096bdc7832a08b4431')) +paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None, 'NCDHW')), ('document', '709d7ca3a94f52a253d15b06aafb1bd0')) paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '10e122eb755c2bd1f78ef2332b28f1a0')) paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '858c432e7cbd8bb952cc2eb555457d50')) paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'df08b9c499ab3a90f95d08ab5b6c6c62')) @@ -170,7 +170,7 @@ paddle.fluid.layers.reduce_any (ArgSpec(args=['input', 'dim', 'keep_dim', 'name' paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'f2dfd65b859de9844e7261e7a4503f63')) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '1af2e3a887e4f914f9d6650406186ab6')) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '39fbc5437be389f6c0c769f82fc1fba2')) -paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', '558d13133596209190df9a624264f28f')) +paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', '4fd396b6cf16bb4ef2a56d695d0ad941')) paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '78cf3a7323d1a7697658242e13f63759')) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'input_length', 'padding_value', 'name'], varargs=None, keywords=None, defaults=(None, 0, None)), ('document', '9abb7bb8d267e017620a39a146dc47ea')) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(True, None, None, None)), ('document', '77cbfb28cd2fc589f589c7013c5086cd')) @@ -297,7 +297,7 @@ paddle.fluid.layers.prroi_pool (ArgSpec(args=['input', 'rois', 'output_channels' paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '07cb0d95a646dba1b9cc7cdce89e59f0')) paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '11bb8e62cc9256958eff3991fe4834da')) paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '18bc95c62d3300456c3c7da5278b47bb')) -paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '6b6ee1170fe20a79cf0631a1f49b0df2')) +paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', 'a41a93253c937697e900e19af172490d')) paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '7e5cac851fd9bad344230e1044b6a565')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', '20992b20d19c2e5983f366150827b4a6')) paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', 'c03490ffaa1b78258747157c313db4cd')) @@ -907,7 +907,7 @@ paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'f paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', 'd6a1e527b53f5cc15594fee307dfc5cf')) paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', 'b87bacfc70dd3477ed25ef14aa01389a')) paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', 'b1a07a0000eb9103e3a143ca8c13de5b')) -paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '4913d846264f17112bf7bc04273388cc')) +paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '591a48aa9d871896aa8548c977c4c120')) paddle.fluid.optimizer.SGDOptimizer ('paddle.fluid.optimizer.SGDOptimizer', ('document', 'c3c8dd3193d991adf8bda505560371d6')) paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index 93304ec6700b795c923f24a5d0663884b818b9b3..289bb5d2dddef7a92f46ef8e3f9fb554ddfd9bca 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -73,8 +73,12 @@ class CosSimOp : public framework::OperatorWithKernel { class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "The 1st input of cos_sim op."); - AddInput("Y", "The 2nd input of cos_sim op."); + AddInput("X", + "The 1st input of cos_sim op, LoDTensor with shape ``[N_1, N_2, " + "..., N_k]``, the data type is float32."); + AddInput("Y", + "The 2nd input of cos_sim op, Tensor with shape ``[N_1 or 1, N_2, " + "..., N_k]``, the data type is float32."); AddOutput("Out", "The output of cos_sim op."); AddOutput("XNorm", "Norm of the first input, reduced along the 1st " diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index add4c8933aa6aacb5c1a74af3f9b9ac6bea910e7..7b8ea4b7b0f89b4f6af8d26370d7b3be9a3bf878 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1597,7 +1597,7 @@ def cos_sim(X, Y): Y (Variable): ${y_comment}. Returns: - Variable: the output of cosine(X, Y). + A Variable holding LoDTensor representing the output of cosine(X, Y). Examples: .. code-block:: python @@ -1639,13 +1639,13 @@ def dropout(x, dropout op can be removed from the program to make the program more efficient. Args: - x (Variable): The input tensor variable. + x (Variable): The input tensor variable. The data type is float16 or float32 or float64. dropout_prob (float): Probability of setting units to zero. is_test (bool): A flag indicating whether it is in test phrase or not. seed (int): A Python integer used to create random seeds. If this parameter is set to None, a random seed is used. NOTE: If an integer seed is given, always the same output - units will be dropped. DO NOT use a fixed seed in training. + units will be dropped. DO NOT use a fixed seed in training.Default: None. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. dropout_implementation(string): ['downgrade_in_infer'(default)|'upscale_in_train'] @@ -1667,7 +1667,7 @@ def dropout(x, Returns: - Variable: A tensor variable is the shape with `x`. + A Variable holding Tensor representing the dropout, has same shape and data type with `x`. Examples: @@ -2360,31 +2360,32 @@ def conv2d(input, H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 - Note: - padding mode is 'SAME' and 'VALID' can reference this link`_ - Args: - input (Variable): The input image with [N, C, H, W] or [N, H, W, C] format. + input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type + of input is float16 or float32 or float64. num_filters(int): The number of filter. It is as same as the output image channel. filter_size (int|tuple): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_height, - filter_size_width). Otherwise, filter_size_height = filter_\ - size_width = filter_size. - stride (int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_height, stride_width). Otherwise, - stride_height = stride_width = stride. Default: stride = 1. - padding (string|int|list|tuple): The padding size. If `padding` is a string, either 'VALID' or + filter_size_width). Otherwise, filter_size_height = filter_size_width =\ + filter_size. + stride (int|tuple): The stride size. It means the stride in convolution. + If stride is a tuple, it must contain two integers, (stride_height, stride_width). + Otherwise, stride_height = stride_width = stride. Default: stride = 1. + padding (string|int|list|tuple): The padding size. It means the number of zero-paddings + on both sides for each dimention.If `padding` is a string, either 'VALID' or 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_height, pad_width]` or - `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, - `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when + `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], + [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. when `data_format` is `"NHWC"`, `pool_padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_height, dilation_width). Otherwise, - dilation_height = dilation_width = dilation. Default: dilation = 1. + dilation (int|tuple): The dilation size. It means the spacing between the kernel + points. If dilation is a tuple, it must contain two integers, (dilation_height, + dilation_width). Otherwise, dilation_height = dilation_width = dilation. + Default: dilation = 1. groups (int): The groups number of the Conv2d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, the first half of the filters is only connected to the first half @@ -2404,19 +2405,18 @@ def conv2d(input, library is installed. Default: True act (str): Activation type, if it is set to None, activation is not appended. Default: None - name (str|None): A name for this layer(optional). If set None, the layer - will be named automatically. Default: None. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. data_format (str): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. Returns: - Variable: The tensor variable storing the convolution and \ - non-linearity activation result. - - Raises: - ValueError: If the shapes of input, filter_size, stride, padding and - groups mismatch. + A Variable holding Tensor representing the conv2d, whose data type is the + same with input. If act is None, the tensor variable storing the convolution + result, and if act is not None, the tensor variable storing convolution + and non-linearity activation result. Examples: .. code-block:: python @@ -2577,8 +2577,6 @@ def conv3d(input, name=None, data_format="NCDHW"): """ - **Convlution3D Layer** - The convolution3D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input(Input) and Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of @@ -2623,17 +2621,19 @@ def conv3d(input, W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 Args: - input (Variable): The input image with [N, C, D, H, W] or [N, D, H, W, C]format. + input (Variable): The input is 5-D Tensor with shape [N, C, D, H, W], the data + type of input is float16 or float32 or float64. num_filters(int): The number of filter. It is as same as the output image channel. filter_size (int|tuple): The filter size. If filter_size is a tuple, it must contain three integers, (filter_size_depth, filter_size_height, filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ filter_size_width = filter_size. - stride (int|tuple): The stride size. If stride is a tuple, it must - contain three integers, (stride_depth, stride_height, stride_width). Otherwise, - stride_depth = stride_height = stride_width = stride. Default: stride = 1. - padding (string|int|list|tuple): The padding size. f `padding` is a string, either 'VALID' or + stride (int|tuple): The stride size. It means the stride in convolution. If stride is a + tuple, it must contain three integers, (stride_depth, stride_height, stride_width). + Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. + padding (string|int|list|tuple): The padding size. It means the number of zero-paddings + on both sides for each dimention. If `padding` is a string, either 'VALID' or 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, @@ -2642,9 +2642,10 @@ def conv3d(input, when `data_format` is `"NDHWC"`, `pool_padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, - dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. + dilation (int|tuple): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. + Default: dilation = 1. groups (int): The groups number of the Conv3d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, the first half of the filters is only connected to the first half @@ -2664,19 +2665,18 @@ def conv3d(input, library is installed. Default: True act (str): Activation type, if it is set to None, activation is not appended. Default: None. - name (str|None): A name for this layer(optional). If set None, the layer - will be named automatically. Default: None. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. data_format (str): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`. Returns: - Variable: The tensor variable storing the convolution and \ - non-linearity activation result. - - Raises: - ValueError: If the shapes of input, filter_size, stride, padding and - groups mismatch. + A Variable holding Tensor representing the conv3d, whose data type is + the same with input. If act is None, the tensor variable storing the + convolution result, and if act is not None, the tensor variable storing + convolution and non-linearity activation result. Examples: .. code-block:: python @@ -3685,7 +3685,7 @@ def batch_norm(input, """ **Batch Normalization Layer** - Can be used as a normalizer function for conv2d and fully_connected operations. + Can be used as a normalizer function for convolution or fully_connected operations. The required data format for this layer is one of the following: 1. NHWC `[batch, in_height, in_width, in_channels]` @@ -3708,10 +3708,11 @@ def batch_norm(input, \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - moving\_mean = moving\_mean * momentum + mini-batch\_mean * (1. - momentum) - moving\_var = moving\_var * momentum + mini-batch\_var * (1. - momentum) - moving_mean and moving_var is global mean and global variance. + moving\_mean = moving\_mean * momentum + mini-batch\_mean * (1. - momentum) \\\\ + moving\_var = moving\_var * momentum + mini-batch\_var * (1. - momentum) + + moving_mean is global mean and moving_var is global variance. When use_global_stats = True, the :math:`\\mu_{\\beta}` and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. @@ -3730,7 +3731,8 @@ def batch_norm(input, sync_batch_norm automatically. Args: - input(variable): The rank of input variable can be 2, 3, 4, 5. + input(variable): The rank of input variable can be 2, 3, 4, 5. The data type + is float16 or float32 or float64. act(string, Default None): Activation type, linear|relu|prelu|... is_test (bool, Default False): A flag indicating whether it is in test phrase or not. @@ -3751,14 +3753,14 @@ def batch_norm(input, will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - data_layout(string, default NCHW): NCHW|NHWC + data_layout(str, default NCHW): the data_layout of input, is NCHW or NHWC. in_place(bool, Default False): Make the input and output of batch norm reuse memory. - name(string, Default None): A name for this layer(optional). If set None, the layer - will be named automatically. - moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. If it + name(str|None): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm will save global mean with the string. - moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. + moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance. If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm will save global variance with the string. do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. @@ -3770,7 +3772,8 @@ def batch_norm(input, and variance are also used during train period. Returns: - Variable: A tensor variable which is the result after applying batch normalization on the input. + A Variable holding Tensor which is the result after applying batch normalization on the input, + has same shape and data type with input. Examples: @@ -3892,7 +3895,7 @@ def instance_norm(input, """ **Instance Normalization Layer** - Can be used as a normalizer function for conv2d and fully_connected operations. + Can be used as a normalizer function for convolution or fully_connected operations. The required data format for this layer is one of the following: DataLayout: NCHW `[batch, in_channels, in_height, in_width]` @@ -3906,19 +3909,19 @@ def instance_norm(input, .. math:: \\mu_{\\beta} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW} x_i \\qquad &//\\ - \\ mean of one feature map in mini-batch \\\\ + \\ mean\ of\ one\ feature\ map\ in\ mini-batch \\\\ \\sigma_{\\beta}^{2} &\\gets \\frac{1}{HW} \\sum_{i=1}^{HW}(x_i - \\ - \\mu_{\\beta})^2 \\qquad &//\ variance of one feature map in mini-batch \\\\ + \\mu_{\\beta})^2 \\qquad &//\ variance\ of\ one\ feature\ map\ in\ mini-batch \\\\ \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ - \\sigma_{\\beta}^{2} + \\epsilon}} \\\\ - y_i &\\gets \\gamma \\hat{x_i} + \\beta + Note: + `H` means height of feature map, `W` means width of feature map. Args: - input(variable): The rank of input variable can be 2, 3, 4, 5. + input(variable): The rank of input variable can be 2, 3, 4, 5. + The data type is float32 or float64. epsilon(float, Default 1e-05): A value added to the denominator for numerical stability. Default is 1e-5. param_attr(ParamAttr|None): The parameter attribute for Parameter `scale` @@ -3935,7 +3938,8 @@ def instance_norm(input, will be named automatically. Returns: - Variable: A tensor variable which is the result after applying instance normalization on the input. + A Variable holding Tensor which is the result after applying instance normalization on the input, + has same shape and data type with input. Examples: @@ -4429,8 +4433,6 @@ def conv2d_transpose(input, name=None, data_format='NCHW'): """ - **Convlution2D transpose layer** - The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCHW or NHWC format. Where N is batch size, C is the number of channels, @@ -4479,10 +4481,11 @@ def conv2d_transpose(input, H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ] \\\\ W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ] - padding mode is 'SAME' and 'VALID' can reference this link`_ - Note: - if output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`; + The conv2d_transpose can be seen as the backward of the conv2d. For conv2d, + when stride > 1, conv2d maps multiple input shape to the same output shape, + so for conv2d_transpose, when stride > 1, input shape maps multiple output shape. + If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` and :math:`H^\prime_{out} + strides[0]`, and the :math:`W_{out}` of the output size must between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[1]`, @@ -4496,13 +4499,19 @@ def conv2d_transpose(input, output_size(int|tuple, optional): The output image size. If output size is a tuple, it must contain two integers, (image_height, image_width). None if use filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. + If output_size and filter_size are specified at the same time, They + should follow the formula above. Default: None. output_size and filter_size + should not be None at the same time. filter_size(int|tuple, optional): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_height, filter_size_width). Otherwise, filter_size_height = filter_size_width = filter_size. None if - use output size to calculate filter_size. Default: None. - padding(int|list|str|tuple, optional):The padding size. If `padding` is a + use output size to calculate filter_size. Default: None. filter_size and + output_size should not be None at the same time. + stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain two integers, (stride_height, stride_width). + Otherwise, stride_height = stride_width = stride. Default: stride = 1. + padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds + `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` is a tuple or list, it could be in three forms: `[pad_height, pad_width]` or @@ -4512,12 +4521,13 @@ def conv2d_transpose(input, when `data_format` is `'NHWC'`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. If stride is a tuple, it must - contain two integers, (stride_height, stride_width). Otherwise, - stride_height = stride_width = stride. Default: stride = 1. - dilation(int|tuple, optional): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_height, dilation_width). Otherwise, - dilation_height = dilation_width = dilation. Default: dilation = 1. + dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). + Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. + filter_size(int|tuple, optional): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_height, filter_size_width). + Otherwise, filter_size_height = filter_size_width = filter_size. None if + use output size to calculate filter_size. Default: None. groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -4537,18 +4547,23 @@ def conv2d_transpose(input, library is installed. Default: True. act (str, optional): Activation type, if it is set to None, activation is not appended. Default: None. - name(str, optional): A name for this layer(optional). If set None, the layer - will be named automatically. Default: True. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. Default: 'NCHW'. Returns: - Variable: A 4-D Tensor of the shape (num_batches, channels, out_h, out_w) or - (num_batches, out_h, out_w, channels). + A Variable holding Tensor representing the conv2d_transpose, whose + data type is the same with input and shape is (num_batches, channels, out_h, + out_w) or (num_batches, out_h, out_w, channels). If act is None, the tensor variable + storing the transposed convolution result, and if act is not None, the + tensor variable storing transposed convolution and non-linearity activation + result. Raises: - ValueError: If the shapes of input, filter_size, stride, padding and + ValueError: If the shapes of output, input, filter_size, stride, padding and groups mismatch. Examples: @@ -4690,8 +4705,6 @@ def conv3d_transpose(input, name=None, data_format='NCDHW'): """ - **Convlution3D transpose layer** - The convolution3D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels, @@ -4735,26 +4748,43 @@ def conv3d_transpose(input, .. math:: - D_{out} &= (D_{in} - 1) * strides[0] - pad_depth_front - pad_depth_back + dilations[0] * (D_f - 1) + 1 \\\\ - H_{out} &= (H_{in} - 1) * strides[1] - pad_height_top - pad_height_bottom + dilations[1] * (H_f - 1) + 1 \\\\ - W_{out} &= (W_{in} - 1) * strides[2] - pad_width_left - pad_width_right + dilations[2] * (W_f - 1) + 1 - - Padding mode is 'SAME' and 'VALID' can reference this - link`_ + D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\ + H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\ + W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\ + D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\ + H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\ + W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[2] ] - Args: - input(Variable): A 5-D Tensor with [N, C, H, W] or [N, H, W, C] format. Its data type is float32 or float64. + Note: + The conv3d_transpose can be seen as the backward of the conv3d. For conv3d, + when stride > 1, conv3d maps multiple input shape to the same output shape, + so for conv3d_transpose, when stride > 1, input shape maps multiple output shape. + If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \ + H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output + size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`, + the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` + and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must + between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, + conv3d_transpose can compute the kernel size automatically. + + Args: + input(Variable): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type + of input is float32 or float64. num_filters(int): The number of the filter. It is as same as the output image channel. output_size(int|tuple, optional): The output image size. If output size is a - tuple, it must contain three integers, (image_D, image_H, image_W). This - parameter only works when filter_size is None. + tuple, it must contain three integers, (image_depth, image_height, image_width). This + parameter only works when filter_size is None. If output_size and filter_size are + specified at the same time, They should follow the formula above. Default: None. + Output_size and filter_size should not be None at the same time. filter_size(int|tuple, optional): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_depth, filter_size_height, \ + it must contain three integers, (filter_size_depth, filter_size_height, filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ filter_size_width = filter_size. None if use output size to - calculate filter_size. - padding(int|list|str|tuple, optional): The padding size. if `padding` is a string, + calculate filter_size. Default: None. filter_size and output_size should not be + None at the same time. + padding(int|list|str|tuple, optional): The padding size. The padding argument effectively + adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, @@ -4763,12 +4793,14 @@ def conv3d_transpose(input, when `data_format` is `'NDHWC'`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - stride(int|tuple, optional): The stride size. If stride is a tuple, it must - contain three integers, (stride_depth, stride_height, stride_width). Otherwise, - stride_depth = stride_height = stride_width = stride. Default: stride = 1. - dilation(int|tuple, optional): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, - dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. + stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. + If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. + Default: stride = 1. + dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. + Default: dilation = 1. groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -4788,18 +4820,22 @@ def conv3d_transpose(input, library is installed. Default: True act (str, optional): Activation type, if it is set to None, activation is not appended. Default: None. - name(str, optional): A name for this layer(optional). If set None, the layer - will be named automatically. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. data_format(str, optional):The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. Default: 'NCDHW'. Returns: - A 5-D Tensor of the shape (num_batches, channels, out_d, out_h, out_w) or - (num_batches, out_d, out_h, out_w, channels). + A Variable holding Tensor representing the conv3d_transpose, whose data + type is the same with input and shape is (num_batches, channels, out_d, out_h, + out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor + variable storing the transposed convolution result, and if act is not None, the tensor + variable storing transposed convolution and non-linearity activation result. Raises: - ValueError: If the shapes of input, filter_size, stride, padding and + ValueError: If the shapes of output, input, filter_size, stride, padding and groups mismatch. Examples: @@ -14461,20 +14497,25 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002): ''' **Npair Loss Layer** - Read `Improved Deep Metric Learning with Multi class N pair Loss Objective `_ . + Read `Improved Deep Metric Learning with Multi class N pair Loss Objective\ + `_ . Npair loss requires paired data. Npair loss has two parts: the first part is L2 regularizer on the embedding vector; the second part is cross entropy loss which takes the similarity matrix of anchor and positive as logits. Args: - anchor(Variable): embedding vector for the anchor image. shape=[batch_size, embedding_dims] - positive(Variable): embedding vector for the positive image. shape=[batch_size, embedding_dims] - labels(Variable): 1-D tensor. shape=[batch_size] - l2_reg(float32): L2 regularization term on embedding vector, default: 0.002 + anchor(Variable): embedding vector for the anchor image. shape=[batch_size, embedding_dims], + the data type is float32 or float64. + positive(Variable): embedding vector for the positive image. shape=[batch_size, embedding_dims], + the data type is float32 or float64. + labels(Variable): 1-D tensor. shape=[batch_size], the data type is float32 or float64 or int64. + l2_reg(float32): L2 regularization term on embedding vector, default: 0.002. Returns: - npair loss(Variable): return npair loss, shape=[1] + A Variable holding Tensor representing the npair loss, the data type is the same as + anchor, the shape is [1]. Examples: .. code-block:: python diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index e340f03161ef6d90cdce6549e56269d591c8a9b7..14bf9e874043e92e02741dba892993d0748dcba7 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -152,11 +152,11 @@ def img_conv_group(input, result to Pool2d. Args: - input (Variable): The input image with [N, C, H, W] format. + input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type of input is float32 or float64. conv_num_filter(list|tuple): Indicates the numbers of filter of this group. pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size - is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). - Otherwise, the pool_size_H = pool_size_W = pool_size. + is a list or tuple, it must contain two integers, (pool_size_height, pool_size_width). + Otherwise, the pool_size_height = pool_size_width = pool_size. conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is a list or tuple, its length must be equal to the length of conv_num_filter. Otherwise the conv_padding of all Conv2d Layers are the same. Default 1. @@ -184,8 +184,8 @@ def img_conv_group(input, library is installed. Default: True Return: - Variable: The final result after serial computation using Convolution2d, - BatchNorm, DropOut, and Pool2d. + The final result after serial computation using Convolution2d, + BatchNorm, DropOut, and Pool2d. Examples: .. code-block:: python