From fe218df3262fce1f49dd0c1280159ca73477678b Mon Sep 17 00:00:00 2001 From: lvmengsi Date: Wed, 25 Sep 2019 14:26:04 +0800 Subject: [PATCH] Fix ssdloss num and batch norm format and conv2d (#19754) * update API.spec --- paddle/fluid/API.spec | 14 ++-- python/paddle/fluid/layers/detection.py | 2 +- python/paddle/fluid/layers/nn.py | 95 +++++++++++++------------ 3 files changed, 58 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 0b570181d3..512ffbff5e 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -129,8 +129,8 @@ paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=N paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9')) paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c')) paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'padding_start', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, True, None, None, None, None, None)), ('document', '2bf23e7884c380c3b27f2709aa322cb9')) -paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '35f5343338e38803c70ed0479965d0b4')) -paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '367293b5bada54136a91621078d38334')) +paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '06de9adb5994f6f8cb806c75b55550af')) +paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '71b09227709475fa178c1739dff64af6')) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test', 'pad_value'], varargs=None, keywords=None, defaults=(False, 0.0)), ('document', 'e90a93251c52dc4e6fb34fb3991b3f82')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'eaa9d0bbd3d4e017c8bc4ecdac483711')) paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'cee673c79e3ff4582656a24e04f841e5')) @@ -138,12 +138,12 @@ paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'po paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '053b1a855f13a066d005759171724bc6')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '52343203de40afe29607397e13aaf0d2')) paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '55db6ae7275fb9678a6814aebab81a9c')) -paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '404741b5690228c493a2d9f59c6b1122')) -paddle.fluid.layers.instance_norm (ArgSpec(args=['input', 'epsilon', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None)), ('document', 'c124b947a6ac4d01f491275561b9c1ab')) +paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '9e5a9f4f6d82d34a33d9ca632379cbcc')) +paddle.fluid.layers.instance_norm (ArgSpec(args=['input', 'epsilon', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None)), ('document', '02972097e089629efdb0ed9404fd36ae')) paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', '2460b30fb87037555208fa8ac6fc1787')) paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '83e08f21af41ac8bac37aeab1f86fdd0')) -paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '6d3b135bb3834d58ef2cb581ead1487c')) -paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'fb08f59141971b11f5f03bba06e9fc5a')) +paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ab58296b567bf0c686084add7f3280a4')) +paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'fe15dbfb17d97d3d29b2fa7ee6390ee6')) paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '10e122eb755c2bd1f78ef2332b28f1a0')) paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '858c432e7cbd8bb952cc2eb555457d50')) paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'df08b9c499ab3a90f95d08ab5b6c6c62')) @@ -404,7 +404,7 @@ paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size' paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '080ce0d54d3f1950ad5a3a8e5ae529e9')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40')) -paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '8edacd4b9bd02dd68931b9fa6bfe0cbd')) +paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '14d1eeae0f41b6792be43c1c0be0589b')) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '651d98d51879dfa1bc1cd40391786a41')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d')) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 4564eb1e72..aa6c7ed0b8 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -1329,7 +1329,7 @@ def ssd_loss(location, 5.1 Compute confidence loss. - 5.1 Compute localization loss. + 5.2 Compute localization loss. 5.3 Compute the overall weighted loss. diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9f2a4f5dac..270924dd77 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2332,22 +2332,26 @@ def conv2d(input, H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 + Note: + padding mode is 'SAME' and 'VALID' can reference this link`_ + Args: input (Variable): The input image with [N, C, H, W] format. num_filters(int): The number of filter. It is as same as the output image channel. - filter_size (int|tuple): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. + filter_size (int|tuple): The filter size. If filter_size + is a tuple, it must contain two integers, (filter_size_height, + filter_size_width). Otherwise, filter_size_height = filter_\ + size_width = filter_size. stride (int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. + contain two integers, (stride_height, stride_width). Otherwise, + stride_height = stride_width = stride. Default: stride = 1. padding (int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. + contain two integers, (padding_height, padding_width). Otherwise, + padding_height = padding_width = padding. Default: padding = 0. dilation (int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. + contain two integers, (dilation_height, dilation_width). Otherwise, + dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv2d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, the first half of the filters is only connected to the first half @@ -2511,18 +2515,19 @@ def conv3d(input, input (Variable): The input image with [N, C, D, H, W] format. num_filters(int): The number of filter. It is as same as the output image channel. - filter_size (int|tuple|None): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). - Otherwise, the filter will be a square. + filter_size (int|tuple): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_depth, filter_size_height, + filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ + filter_size_width = filter_size. stride (int|tuple): The stride size. If stride is a tuple, it must - contain three integers, (stride_D, stride_H, stride_W). Otherwise, the - stride_D = stride_H = stride_W = stride. Default: stride = 1. + contain three integers, (stride_depth, stride_height, stride_width). Otherwise, + stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (int|tuple): The padding size. If padding is a tuple, it must - contain three integers, (padding_D, padding_H, padding_W). Otherwise, the - padding_D = padding_H = padding_W = padding. Default: padding = 0. + contain three integers, (padding_depth, padding_height, padding_width). Otherwise, + padding_depth = padding_height = padding_width = padding. Default: padding = 0. dilation (int|tuple): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the - dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1. + contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, + dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv3d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, the first half of the filters is only connected to the first half @@ -3350,6 +3355,10 @@ def batch_norm(input, \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift + moving\_mean = moving\_mean * momentum + mini-batch\_mean * (1. - momentum) + moving\_var = moving\_var * momentum + mini-batch\_var * (1. - momentum) + moving_mean and moving_var is global mean and global variance. + When use_global_stats = True, the :math:`\\mu_{\\beta}` and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. @@ -3538,15 +3547,6 @@ def instance_norm(input, \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift - - When use_global_stats = True, the :math:`\\mu_{\\beta}` - and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch. - They are global (or running) statistics. (It usually got from the - pre-trained model.) - The training and testing (or inference) have the same behavior: - - .. math:: - \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ \\sigma_{\\beta}^{2} + \\epsilon}} \\\\ y_i &\\gets \\gamma \\hat{x_i} + \\beta @@ -4124,23 +4124,23 @@ def conv2d_transpose(input, num_filters(int): The number of the filter. It is as same as the output image channel. output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use + tuple, it must contain two integers, (image_height, image_width). None if use filter_size, padding, and stride to calculate output_size. if output_size and filter_size are specified at the same time, They should follow the formula above. filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. None if use output size to - calculate filter_size. + it must contain two integers, (filter_size_height, filter_size_width). + Otherwise, filter_size_height = filter_size_width = filter_size. None if + use output size to calculate filter_size. padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. + contain two integers, (padding_height, padding_width). Otherwise, + padding_height = padding_width = padding. Default: padding = 0. stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. + contain two integers, (stride_height, stride_width). Otherwise, + stride_height = stride_width = stride. Default: stride = 1. dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. + contain two integers, (dilation_height, dilation_width). Otherwise, + dilation_height = dilation_width = dilation. Default: dilation = 1. groups(int): The groups number of the Conv2d transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -4318,18 +4318,19 @@ def conv3d_transpose(input, tuple, it must contain three integers, (image_D, image_H, image_W). This parameter only works when filter_size is None. filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). - Otherwise, the filter will be a square. None if use output size to + it must contain three integers, (filter_size_depth, filter_size_height, \ + filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ + filter_size_width = filter_size. None if use output size to calculate filter_size. padding(int|tuple): The padding size. If padding is a tuple, it must - contain three integers, (padding_D, padding_H, padding_W). Otherwise, the - padding_D = padding_H = padding_W = padding. Default: padding = 0. + contain three integers, (padding_depth, padding_height, padding_width). Otherwise, + padding_depth = padding_height = padding_width = padding. Default: padding = 0. stride(int|tuple): The stride size. If stride is a tuple, it must - contain three integers, (stride_D, stride_H, stride_W). Otherwise, the - stride_D = stride_H = stride_W = stride. Default: stride = 1. + contain three integers, (stride_depth, stride_height, stride_width). Otherwise, + stride_depth = stride_height = stride_width = stride. Default: stride = 1. dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the - dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1. + contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, + dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. groups(int): The groups number of the Conv3d transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -10112,6 +10113,10 @@ def pad2d(input, """ helper = LayerHelper('pad2d', **locals()) + + assert mode in ['reflect', 'edge', 'constant' + ], "mode should be one of constant, reflect, edge." + dtype = helper.input_dtype(input_param_name='input') out = helper.create_variable_for_type_inference(dtype) inputs = {'X': input} -- GitLab