diff --git a/python/paddle/vision/models/alexnet.py b/python/paddle/vision/models/alexnet.py index 411a8f01be295a502ddfea4eda9d7206cee5ab67..d483009fdeb1f7b3c277868624c902493d48a755 100644 --- a/python/paddle/vision/models/alexnet.py +++ b/python/paddle/vision/models/alexnet.py @@ -1,4 +1,4 @@ -# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -175,14 +175,20 @@ def _alexnet(arch, pretrained, **kwargs): def alexnet(pretrained=False, **kwargs): - """AlexNet model + """ + AlexNet model Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. - + pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False. + **kwargs: Additional keyword arguments,For details, please refer to :ref:`AlexNet `. + + Returns: + the model of alexnet. + Examples: .. code-block:: python - + :name: code-example + import paddle from paddle.vision.models import alexnet # build model @@ -190,5 +196,11 @@ def alexnet(pretrained=False, **kwargs): # build model and load imagenet pretrained weight # model = alexnet(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + # [1, 1000] """ return _alexnet('alexnet', pretrained, **kwargs) diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py index e8e4994a75be4209a0c543f6d4da4bbd42fca174..3cc5e5a20e921197fd7da14572e3269d063d9d59 100644 --- a/python/paddle/vision/models/mobilenetv1.py +++ b/python/paddle/vision/models/mobilenetv1.py @@ -58,14 +58,14 @@ class MobileNetV1(nn.Layer): `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" `_. Args: - scale (float): scale of channels in each layer. Default: 1.0. - num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + scale (float, optional): scale of channels in each layer. Default: 1.0. + num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. - with_pool (bool): use pool before the last fc layer or not. Default: True. + with_pool (bool, optional): use pool before the last fc layer or not. Default: True. Examples: .. code-block:: python - + :name: code-example1 import paddle from paddle.vision.models import MobileNetV1 @@ -75,6 +75,7 @@ class MobileNetV1(nn.Layer): out = model(x) print(out.shape) + # [1, 1000] """ def __init__(self, scale=1.0, num_classes=1000, with_pool=True): diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py index f9111185de63de68a1c4a92494a80725895f15a5..80ca7105cb456e25d2dd5337c64486e56d4b22b2 100644 --- a/python/paddle/vision/models/mobilenetv2.py +++ b/python/paddle/vision/models/mobilenetv2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -75,14 +75,14 @@ class MobileNetV2(nn.Layer): `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. Args: - scale (float): scale of channels in each layer. Default: 1.0. - num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + scale (float, optional): scale of channels in each layer. Default: 1.0. + num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. - with_pool (bool): use pool before the last fc layer or not. Default: True. + with_pool (bool, optional): use pool before the last fc layer or not. Default: True. Examples: .. code-block:: python - + :name: code-example1 import paddle from paddle.vision.models import MobileNetV2 @@ -92,6 +92,7 @@ class MobileNetV2(nn.Layer): out = model(x) print(out.shape) + # [1, 1000] """ def __init__(self, scale=1.0, num_classes=1000, with_pool=True): diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index dd88d06449374cbecc3215cb0df41665d1786735..c7913ce774287d063d34ffc3f65545065b6aa97b 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -33,13 +33,15 @@ class VGG(nn.Layer): Args: features (nn.Layer): Vgg features create by function make_layers. - num_classes (int): Output dim of last fc layer. If num_classes <=0, last fc layer + num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. - with_pool (bool): Use pool before the last three fc layer or not. Default: True. + with_pool (bool, optional): Use pool before the last three fc layer or not. Default: True. Examples: .. code-block:: python + :name: code-example + import paddle from paddle.vision.models import VGG from paddle.vision.models.vgg import make_layers @@ -49,6 +51,12 @@ class VGG(nn.Layer): vgg11 = VGG(features) + x = paddle.rand([1, 3, 224, 224]) + out = vgg11(x) + + print(out.shape) + # [1, 1000] + """ def __init__(self, features, num_classes=1000, with_pool=True): diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 69fba204dd3144299c8d4bdc7d87772ff40bb1c6..876b80c6abc73391be9383b5d3f204e0484bdcf6 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -951,7 +951,7 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): boxes_num (Tensor): The number of boxes contained in each picture in the batch. output_size (int|Tuple(int, int)) The pooled output size(H, W), data type is int32. If int, H and W are both equal to output_size. - spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their + spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0 name(str, optional): The default value is None. Normally there is no need for user to set this property. @@ -963,12 +963,15 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): Examples: .. code-block:: python - + :name: code-example1 + import paddle x = paddle.uniform([2, 490, 28, 28], dtype='float32') boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32') boxes_num = paddle.to_tensor([1, 2], dtype='int32') pool_out = paddle.vision.ops.psroi_pool(x, boxes, boxes_num, 7, 1.0) + print(pool_out.shape) + # [3, 10, 7, 7] """ check_type(output_size, 'output_size', (int, tuple, list), 'psroi_pool') @@ -1014,7 +1017,7 @@ class PSRoIPool(Layer): Args: output_size (int|Tuple(int, int)) The pooled output size(H, W), data type is int32. If int, H and W are both equal to output_size. - spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their + spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. Default: 1.0. Shape: @@ -1025,11 +1028,11 @@ class PSRoIPool(Layer): The output_channels equal to C / (pooled_h * pooled_w), where C is the channels of input. Returns: - None + None. Examples: .. code-block:: python - + :name: code-example1 import paddle psroi_module = paddle.vision.ops.PSRoIPool(7, 1.0) @@ -1037,7 +1040,7 @@ class PSRoIPool(Layer): boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32') boxes_num = paddle.to_tensor([1, 2], dtype='int32') pool_out = psroi_module(x, boxes, boxes_num) - + print(pool_out.shape) # [3, 10, 7, 7] """ def __init__(self, output_size, spatial_scale=1.0): @@ -1187,7 +1190,7 @@ def roi_align(x, aligned=True, name=None): """ - This operator implements the roi_align layer. + Implementing the roi_align layer. Region of Interest (RoI) Align operator (also known as RoI Align) is to perform bilinear interpolation on inputs of nonuniform sizes to obtain fixed-size feature maps (e.g. 7*7), as described in Mask R-CNN. @@ -1211,31 +1214,31 @@ def roi_align(x, the batch, the data type is int32. output_size (int or Tuple[int, int]): The pooled output size(h, w), data type is int32. If int, h and w are both equal to output_size. - spatial_scale (float32): Multiplicative spatial scale factor to translate + spatial_scale (float32, optional): Multiplicative spatial scale factor to translate ROI coords from their input scale to the scale used when pooling. - Default: 1.0 - sampling_ratio (int32): number of sampling points in the interpolation + Default: 1.0. + sampling_ratio (int32, optional): number of sampling points in the interpolation grid used to compute the output value of each pooled output bin. If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling points per bin are used. If <= 0, then an adaptive number of grid points are used (computed as ``ceil(roi_width / output_width)``, and likewise for height). - Default: -1 - aligned (bool): If False, use the legacy implementation. If True, pixel + Default: -1. + aligned (bool, optional): If False, use the legacy implementation. If True, pixel shift the box coordinates it by -0.5 for a better alignment with the two neighboring pixel indices. This version is used in Detectron2. - Default: True + Default: True. name(str, optional): For detailed information, please refer to : ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: - Tensor: The output of ROIAlignOp is a 4-D tensor with shape (num_boxes, + The output of ROIAlignOp is a 4-D tensor with shape (num_boxes, channels, pooled_h, pooled_w). The data type is float32 or float64. Examples: .. code-block:: python - + :name: code-example1 import paddle from paddle.vision.ops import roi_align @@ -1306,12 +1309,12 @@ class RoIAlign(Layer): when pooling. Default: 1.0 Returns: - align_out (Tensor): The output of ROIAlign operator is a 4-D tensor with + The output of ROIAlign operator is a 4-D tensor with shape (num_boxes, channels, pooled_h, pooled_w). Examples: .. code-block:: python - + :name: code-example1 import paddle from paddle.vision.ops import RoIAlign diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 79c0720f607772441a39220b06152b1023ad19ee..301252a048b7a7498afb83cc9ad7555f7676063a 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -666,8 +666,8 @@ class Normalize(BaseTransform): ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: - mean (int|float|list|tuple): Sequence of means for each channel. - std (int|float|list|tuple): Sequence of standard deviations for each channel. + mean (int|float|list|tuple, optional): Sequence of means for each channel. + std (int|float|list|tuple, optional): Sequence of standard deviations for each channel. data_format (str, optional): Data format of img, should be 'HWC' or 'CHW'. Default: 'CHW'. to_rgb (bool, optional): Whether to convert to rgb. Default: False. @@ -683,20 +683,21 @@ class Normalize(BaseTransform): Examples: .. code-block:: python - - import numpy as np - from PIL import Image + :name: code-example + import paddle from paddle.vision.transforms import Normalize - normalize = Normalize(mean=[127.5, 127.5, 127.5], + normalize = Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], data_format='HWC') - fake_img = Image.fromarray((np.random.rand(300, 320, 3) * 255.).astype(np.uint8)) + fake_img = paddle.rand([300,320,3]).numpy() * 255. fake_img = normalize(fake_img) print(fake_img.shape) - print(fake_img.max, fake_img.max) + # (300, 320, 3) + print(fake_img.max(), fake_img.min()) + # 0.99999905 -0.999974 """