diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index 441bc31b93684f94fd1dc36183679f493c03ada0..b83f81b27d1a0745c7a2f3339bc3939eb2f19490 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -28,10 +28,10 @@ def auto_cast(enable=True, custom_white_list=None, custom_black_list=None): Args: enable(bool, optional): Enable auto-mixed-precision or not. Default is True. - custom_white_list(set|list, optional): The custom white_list. It's the set of ops that support + custom_white_list(set|list|tuple, optional): The custom white_list. It's the set of ops that support fp16 calculation and are considered numerically-safe and performance-critical. These ops will be converted to fp16. - custom_black_list(set|list, optional): The custom black_list. The set of ops that support fp16 + custom_black_list(set|list|tuple, optional): The custom black_list. The set of ops that support fp16 calculation and are considered numerically-dangerous and whose effects may also be observed in downstream ops. These ops will not be converted to fp16. diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index f4562924af556f1df832f6f7a4801a0117038675..69a8f8956a8c1591891fdc4b7b8e641d2a8e595c 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -662,7 +662,7 @@ def scatter(tensor, tensor_list=None, src=0, group=None, use_calc_stream=True): Args: tensor (Tensor): The output Tensor. Its data type should be float16, float32, float64, int32 or int64. - tensor_list (list): A list of Tensors to scatter. Every element in the list must be a Tensor whose data type + tensor_list (list|tuple): A list/tuple of Tensors to scatter. Every element in the list must be a Tensor whose data type should be float16, float32, float64, int32 or int64. Default value is None. src (int): The source rank id. Default value is 0. group (Group): The group instance return by new_group or None for global default group. @@ -679,6 +679,8 @@ def scatter(tensor, tensor_list=None, src=0, group=None, use_calc_stream=True): import paddle from paddle.distributed import init_parallel_env + # required: gpu + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) init_parallel_env() if paddle.distributed.ParallelEnv().local_rank == 0: diff --git a/python/paddle/distributed/fleet/base/private_helper_function.py b/python/paddle/distributed/fleet/base/private_helper_function.py index 6b3232b93b22416982d86d80db4530627bb2493a..6af4a9e667528b27aac524534a37b7f747cb4a92 100644 --- a/python/paddle/distributed/fleet/base/private_helper_function.py +++ b/python/paddle/distributed/fleet/base/private_helper_function.py @@ -24,7 +24,7 @@ def wait_server_ready(endpoints): port readiness. Args: - endpoints (list): endpoints string list, like: + endpoints (list|tuple): endpoints string list, like: ["127.0.0.1:8080", "127.0.0.1:8081"] Examples: diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index bf49604a897e5be134eb803d3ac4c4d39aacfb27..782fcb28e991c218a614eb8972ee40191b2bc0ec 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -325,7 +325,7 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): func (function): The target function is called by spawned process. This function need to be able to pickled, so it must be defined at the top level of a module. - args (tuple, optional): Arguments passed to ``func``. + args (list|tuple, optional): Arguments passed to ``func``. nprocs (int, optional): Number of processed to start. Default: -1. when nprocs is -1, the available device will be obtained from the environment variable when the model is executed: If use GPU, diff --git a/python/paddle/fluid/dataloader/dataset.py b/python/paddle/fluid/dataloader/dataset.py index bf3d0a81f99482557a3d4098e98ac7078d9a9321..3578e27cf02af1cabc32ed07e9b72651d7c03e12 100755 --- a/python/paddle/fluid/dataloader/dataset.py +++ b/python/paddle/fluid/dataloader/dataset.py @@ -233,7 +233,7 @@ class TensorDataset(Dataset): each sample by indexing tensors in the 1st dimension. Args: - tensors(list of Tensor): tensors with same shape in the 1st dimension. + tensors(list|tuple): A list/tuple of tensors with same shape in the 1st dimension. Returns: Dataset: a Dataset instance wrapping tensors. diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 59e22f24f33dde392333b7c87b94d3e3d1a1c322..a280667d03df4d705eae72ef863e6bf6cc10d76c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -418,7 +418,7 @@ def cuda_places(device_ids=None): [paddle.CUDAPlace(0), paddle.CUDAPlace(1), paddle.CUDAPlace(2)]. Parameters: - device_ids (list or tuple of int, optional): list of GPU device ids. + device_ids (list|tuple, optional): A list/tuple of int of GPU device ids. Returns: list of paddle.CUDAPlace: Created GPU place list. @@ -429,6 +429,8 @@ def cuda_places(device_ids=None): import paddle import paddle.static as static + # required: gpu + paddle.enable_static() cuda_places = static.cuda_places() diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 768248e136b05fc7bb05c61d7bb974086a484305..30baa2aa26cda3be0ea05e1e55ae3c8999b33740 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -1913,7 +1913,7 @@ def load(program, model_path, executor=None, var_list=None): model_path(str): The file prefix store the program executor(Executor, optional): The executor used for initialize the parameter When startup program is not run. - var_list(list, optional): The Tensor list to load single model file saved with + var_list(list|tuple, optional): The Tensor list/tuple to load single model file saved with [ save_params, save_persistables, save_vars ]. Default: None @@ -2103,7 +2103,7 @@ def load_program_state(model_path, var_list=None): Args: model_path(str): The file prefix store the program - var_list(list, optional): The Tensor list to load saved with + var_list(list|tuple, optional): The Tensor list/tuple to load saved with [ save_params, save_persistables, save_vars ]. Default: None. The var_list is only used to get name, diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 3e2c06f69cfbd7eabf02da6c6c0d5dc316edb893..a7ec339bf741e508cdc73de3810cf50c0f98b885 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -148,7 +148,7 @@ def create_global_var(shape, This function creates a new tensor variable with value in the global block(block 0). Parameters: - shape (list of int): Shape of the variable + shape (list[int]|tuple[int]): Shape of the variable value (float): The value of the variable. The new created variable will be filled with it. dtype (str): Data type of the variable diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index cce951374360b4beab73cf3e2eeff4867f77c33a..251a8407035fdcdcb133ef9e14cf2f937e3603c0 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -81,7 +81,7 @@ def set_cuda_rng_state(state_list): Sets generator state for all cuda generators Args: - state_list(list): The cuda states to set back to cuda generators. state_list is obtained from get_cuda_rng_state(). + state_list(list|tuple): The cuda states to set back to cuda generators. state_list is obtained from get_cuda_rng_state(). Returns: None diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index b939f548e9c01d7be836a321a876d2abac7b74e4..61d1eb0e373341374199b811f198f7e295026ecc 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -182,7 +182,7 @@ class Accuracy(Metric): Encapsulates accuracy metric logic. Args: - topk (int|tuple(int)): Number of top elements to look at + topk (int|list[int]|tuple[int]): Number of top elements to look at for computing accuracy. Default is (1,). name (str, optional): String name of the metric instance. Default is `acc`. diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 5e8dc15cb4a301fbcc0c976656122bcccfeeedfd..1cc8ef6c39b15e348bc76cb014bde82b1c12e9bf 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -207,7 +207,7 @@ def interpolate(x, size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor, its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -638,7 +638,7 @@ def upsample(x, size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 5263d54045ef1629b4f8d3bd1b26ddc26d9f33f0..a8d6a6cc38df2d8fec42f675bec814c8f7518d34 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -218,7 +218,7 @@ def conv1d(x, weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is the number of output channels, g is the number of groups, K is the kernel's size. bias (Tensor, optional): The bias with shape [M,]. Default: None. - stride (int or tuple, optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain one integers, (stride_size). Default: 1. padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms. 1. a string in ['valid', 'same']. @@ -227,7 +227,7 @@ def conv1d(x, 4. a list[int] or tuple[int] whose length is 2. It has the form [pad_before, pad_after]. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain one integer, (dilation_size). Default: 1. groups (int, optional): The groups number of the conv1d function. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, @@ -250,7 +250,7 @@ def conv1d(x, ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `data_format` is not "NCL" or "NLC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ShapeError: If the input is not 3-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. @@ -451,8 +451,8 @@ def conv2d(x, the number of output channels, g is the number of groups, kH is the filter's height, kW is the filter's width. bias (Tensor, optional): The bias with shape [M,]. - stride (int|tuple): The stride size. It means the stride in convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). + stride (int|list|tuple): The stride size. It means the stride in convolution. + If stride is a list/tuple, it must contain two integers, (stride_height, stride_width). Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension.If `padding` is a string, either 'VALID' or @@ -464,8 +464,8 @@ def conv2d(x, when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. It means the spacing between the kernel - points. If dilation is a tuple, it must contain two integers, (dilation_height, + dilation (int|list|tuple): The dilation size. It means the spacing between the kernel + points. If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv2D Layer. According to grouped @@ -488,7 +488,7 @@ def conv2d(x, ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ShapeError: If the input is not 4-D Tensor. ShapeError: If the input's dimension size and filter's dimension size not equal. @@ -637,7 +637,7 @@ def conv1d_transpose(x, K is the size of the kernel. bias(Tensor, optional): The bias, a Tensor with shape [M, ]. stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain one integer, `(stride_size)`. + If stride is a list/tuple, it must contain one integer, `(stride_size)`. Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a @@ -645,7 +645,7 @@ def conv1d_transpose(x, If `padding` is a tuple or list, it could be in two forms: `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. - If it is a tuple, it must contain one integer. Default: 0. + If it is a list/tuple, it must contain one integer. Default: 0. groups(int, optional): The groups number of the conv1d transpose function. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -653,10 +653,10 @@ def conv1d_transpose(x, filters is only connected to the second half of the input channels. Default: groups = 1. dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain one integer, `(dilation_size)`. + If dilation is a list/tuple, it must contain one integer, `(dilation_size)`. Default: dilation = 1. output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain one integer, `(feature_length)`. None if use + tuple/list, it must contain one integer, `(feature_length)`. None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`. @@ -675,7 +675,7 @@ def conv1d_transpose(x, Raises: ValueError: If `data_format` is a string, but not "NCL" or "NLC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and filter_size are None at the same time. ValueError: If `output_padding` is greater than `stride`. @@ -900,7 +900,7 @@ def conv2d_transpose(x, kH is the height of the kernel, and kW is the width of the kernel. bias(Tensor, optional): The bias, a Tensor with shape [M, ]. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain two integers, (stride_height, stride_width). + If stride is a list/tuple, it must contain two integers, (stride_height, stride_width). Otherwise, stride_height = stride_width = stride. Default: stride = 1. padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings on both sides for each dimension. If `padding` is a string, either 'VALID' or @@ -921,10 +921,10 @@ def conv2d_transpose(x, filters is only connected to the second half of the input channels. Default: groups = 1. dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). + If dilation is a list/tuple, it must contain two integers, (dilation_height, dilation_width). Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1. output_size(int|tuple|list, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_height, image_width). None if use + tuple/list, it must contain two integers, (image_height, image_width). None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. @@ -943,7 +943,7 @@ def conv2d_transpose(x, Raises: ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 4-D Tensor. @@ -1120,8 +1120,8 @@ def conv3d(x, where M is the number of filters(output channels), g is the number of groups, kD, kH, kW are the filter's depth, height and width respectively. bias (Tensor, optional): The bias, a Tensor of shape [M, ]. - stride (int|tuple): The stride size. It means the stride in convolution. If stride is a - tuple, it must contain three integers, (stride_depth, stride_height, stride_width). + stride (int|list|tuple): The stride size. It means the stride in convolution. If stride is a + list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple): The padding size. It means the number of zero-paddings on both sides for each dimension. If `padding` is a string, either 'VALID' or @@ -1133,8 +1133,8 @@ def conv3d(x, when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. - dilation (int|tuple): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + dilation (int|list|tuple): The dilation size. It means the spacing between the kernel points. + If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. groups (int): The groups number of the Conv3D Layer. According to grouped @@ -1292,7 +1292,7 @@ def conv3d_transpose(x, kD, kH, kW are the filter's depth, height and width respectively. bias (Tensor, optional): The bias, a Tensor of shape [M, ]. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings @@ -1314,11 +1314,11 @@ def conv3d_transpose(x, filters is only connected to the second half of the input channels. Default: groups=1 dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + If dilation is a list/tuple, it must contain three integers, (dilation_depth, dilation_height, dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. Default: dilation = 1. output_size(int|list|tuple, optional): The output image size. If output size is a - tuple, it must contain three integers, (image_depth, image_height, image_width). + list/tuple, it must contain three integers, (image_depth, image_height, image_width). None if use filter_size(shape of weight), padding, and stride to calculate output_size. data_format (str, optional): Specify the data format of the input, and the data format of the output will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. @@ -1338,7 +1338,7 @@ def conv3d_transpose(x, Raises: ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + ValueError: If `padding` is a list/tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. ValueError: If `output_size` and kernel_size are None at the same time. ShapeError: If the input is not 5-D Tensor. diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index db0a5a5cab3aa778203273aec800a660f382d0e7..8c001793715e511c40557d0865fcc299a6a248a4 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -300,7 +300,7 @@ class Upsample(layers.Layer): size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_w, ) when input is a 3-D Tensor, the shape is (out_h, out_w) when input is a 4-D Tensor and is (out_d, out_h, out_w) when input is a 5-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|Tensor|list|tuple|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -419,7 +419,7 @@ class UpsamplingNearest2D(layers.Layer): its data format is specified by :attr:`data_format`. size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_h, out_w) when input is a 4-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|int|list|tuple|Tensor|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. @@ -506,7 +506,7 @@ class UpsamplingBilinear2D(layers.Layer): its data format is specified by :attr:`data_format`. size (list|tuple|Tensor|None): Output shape of image resize layer, the shape is (out_h, out_w) when input is a 4-D Tensor. - Default: None. If a list, each element can be an integer or a Tensor of shape: [1]. + Default: None. If a list/tuple, each element can be an integer or a Tensor of shape: [1]. If a Tensor , its dimensions size should be a 1. scale_factor (float|int|list|tuple|Tensor|None): The multiplier for the input height or width. At least one of :attr:`size` or :attr:`scale_factor` must be set. diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index b90421c2f8c2940694fbd376395285c5eef14c8a..d6ba04dad04c796b110c2948e9cf5dcefae46afe 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -232,16 +232,16 @@ class Conv1D(_ConvNd): in_channels(int): The number of channels in the input image. out_channels(int): The number of filter. It is as same as the output feature map. - kernel_size (int|tuple|list): The filter size. If kernel_size is a tuple, + kernel_size (int|tuple|list): The filter size. If kernel_size is a tuple/list, it must contain one integer, (kernel_size). - stride (int|tuple|list, optional): The stride size. If stride is a tuple, it must + stride (int|tuple|list, optional): The stride size. If stride is a tuple/list, it must contain one integer, (stride_size). Default: 1. padding(int|str|tuple|list, optional): The size of zeros to be padded. It must be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means the feature map is zero paded by size of `padding` on both sides. 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. The default value is 0. - dilation (int|tuple|list, optional): The dilation size. If dilation is a tuple, it must + dilation (int|tuple|list, optional): The dilation size. If dilation is a tuple/list, it must contain one integer, (dilation_size). Default: 1. groups (int, optional): The groups number of the conv2d Layer. According to grouped convolution in Alex Krizhevsky's Deep CNN paper: when group=2, @@ -410,12 +410,12 @@ class Conv1DTranspose(_ConvNd): in_channels(int): The number of channels in the input image. out_channels(int): The number of the filter. It is as same as the output feature map. - kernel_size(int|tuple|list, optional): The filter size. If kernel_size is a tuple, + kernel_size(int|tuple|list, optional): The filter size. If kernel_size is a tuple/list, it must contain one integers, (kernel_size). None if use output size to calculate kernel_size. Default: None. kernel_size and output_size should not be None at the same time. stride(int|tuple|list, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain one integer, (stride_size). + If stride is a tuple/list, it must contain one integer, (stride_size). Default: stride = 1. padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a @@ -423,7 +423,7 @@ class Conv1DTranspose(_ConvNd): If `padding` is a tuple or list, it could be in two forms: `[pad]` or `[pad_left, pad_right]`. Default: padding = 0. output_padding(int|list|tuple, optional): The count of zeros to be added to tail of each dimension. - If it is a tuple, it must contain one integer. Default: 0. + If it is a tuple/list, it must contain one integer. Default: 0. groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by grouped convolution in Alex Krizhevsky's Deep CNN paper, in which when group=2, the first half of the filters is only connected to the @@ -432,7 +432,7 @@ class Conv1DTranspose(_ConvNd): Default: groups = 1. bias(bool, optional): Whether to use bias. Default: True. dilation(int|tuple|list, optional): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain one integer, (dilation_size). + If dilation is a tuple/list, it must contain one integer, (dilation_size). Default: dilation = 1. weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights of conv1d_transpose. If it is set to None or one attribute of ParamAttr, conv1d_transpose @@ -451,7 +451,7 @@ class Conv1DTranspose(_ConvNd): Shape: - x(Tensor): 3-D tensor with shape (batch, in_channels, length) when data_format is "NCL" or shape (batch, length, in_channels) when data_format is "NLC". - - output_size(int|tuple|list, optional): The output image size. If output size is a tuple, it must contain one integer, (feature_length). None if use kernel_size, padding, output_padding and stride to calculate output_size. If output_size and kernel_size are specified at the same time, They should follow the formula above. Default: None. output_size and kernel_size should not be None at the same time. + - output_size(int|tuple|list, optional): The output image size. If output size is a tuple/list, it must contain one integer, (feature_length). None if use kernel_size, padding, output_padding and stride to calculate output_size. If output_size and kernel_size are specified at the same time, They should follow the formula above. Default: None. output_size and kernel_size should not be None at the same time. - output(Tensor): 3-D tensor with same shape as input x. Examples: @@ -555,7 +555,7 @@ class Conv2D(_ConvNd): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -565,7 +565,7 @@ class Conv2D(_ConvNd): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D Layer. According to grouped @@ -710,10 +710,10 @@ class Conv2DTranspose(_ConvNd): Parameters: in_channels(int): The number of channels in the input image. out_channels(int): The number of channels produced by the convolution. - kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + kernel_size(int|list|tuple): The kernel size. If kernel_size is a list/tuple, it must contain two integers, (kernel_size_H, kernel_size_W). Otherwise, the kernel will be a square. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -725,7 +725,7 @@ class Conv2DTranspose(_ConvNd): The default value is 0. output_padding(int|list|tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: 1. groups(int, optional): The groups number of the Conv2D transpose layer. Inspired by @@ -866,7 +866,7 @@ class Conv3D(_ConvNd): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_D, stride_H, stride_W). Otherwise, the stride_D = stride_H = stride_W = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -876,7 +876,7 @@ class Conv3D(_ConvNd): 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D Layer. According to grouped @@ -1037,11 +1037,11 @@ class Conv3DTranspose(_ConvNd): Parameters: in_channels(int): The number of channels in the input image. out_channels(int): The number of channels produced by the convolution. - kernel_size(int|list|tuple): The kernel size. If kernel_size is a tuple, + kernel_size(int|list|tuple): The kernel size. If kernel_size is a list/tuple, it must contain three integers, (kernel_size_D, kernel_size_H, kernel_size_W). Otherwise, the kernel will be a square. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. - If stride is a tuple, it must contain three integers, (stride_depth, stride_height, + If stride is a list/tuple, it must contain three integers, (stride_depth, stride_height, stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. The default value is 1. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. @@ -1053,7 +1053,7 @@ class Conv3DTranspose(_ConvNd): The default value is 0. output_padding(int|list|tuple, optional): Additional size added to one side of each dimension in the output shape. Default: 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. groups(int, optional): The groups number of the Conv3D transpose layer. Inspired by @@ -1071,11 +1071,6 @@ class Conv3DTranspose(_ConvNd): If it is set to None or one attribute of ParamAttr, conv3d_transpose will create ParamAttr as bias_attr. If the Initializer of the bias_attr is not set, the bias is initialized zero. The default value is None. - output_size(int|list|tuple, optional): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). None if use - filter_size, padding, and stride to calculate output_size. - if output_size and filter_size are specified at the same time, They - should follow the formula above. Default: None. data_format(str, optional): Data format that specifies the layout of input. It can be "NCDHW" or "NDHWC". Default: "NCDHW". diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 0cefb89340a7c0f847f2c81e48aa473ecfccaa16..964cfa74ebf0881cb7ceeed4c0e37d64f5005f98 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -447,7 +447,7 @@ class LSTMCell(RNNCellBase): Inputs: - **inputs** (Tensor): shape `[batch_size, input_size]`, the input, corresponding to :math:`x_t` in the formula. - - **states** (tuple, optional): a tuple of two tensors, each of shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}, c_{t-1}` in the formula. When states is None, zero state is used. Defaults to None. + - **states** (list|tuple, optional): a list/tuple of two tensors, each of shape `[batch_size, hidden_size]`, the previous hidden state, corresponding to :math:`h_{t-1}, c_{t-1}` in the formula. When states is None, zero state is used. Defaults to None. Returns: - **outputs** (Tensor): shape `[batch_size, hidden_size]`, the output, corresponding to :math:`h_{t}` in the formula. @@ -1251,7 +1251,7 @@ class LSTM(RNNBase): Inputs: - **inputs** (Tensor): the input sequence. If `time_major` is True, the shape is `[time_steps, batch_size, input_size]`, else, the shape is `[batch_size, time_steps, hidden_size]`. - - **initial_states** (tuple, optional): the initial state, a tuple of (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used. + - **initial_states** (list|tuple, optional): the initial state, a list/tuple of (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. If initial_state is not given, zero initial states are used. - **sequence_length** (Tensor, optional): shape `[batch_size]`, dtype: int64 or int32. The valid lengths of input sequences. Defaults to None. If `sequence_length` is not None, the inputs are treated as padded sequences. In each input sequence, elements whos time step index are not less than the valid length are treated as paddings. Returns: diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 5aded4949e2d7a62ed9f63ca5bc89b48202f4c9c..fe70a99ffb518fa17595c8a9a1817adcec457493 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -461,14 +461,14 @@ class TransformerEncoderLayer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, `weight_attr[0]` would be used as `weight_attr` for MHA, and `weight_attr[1]` would be used as `weight_attr` for linear in FFN. Otherwise, MHA and FFN both use it as `weight_attr` to create parameters. Default: None, which means the default weight parameter property is used. See usage for details in :code:`ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, `bias_attr[0]` would be used as `bias_attr` for MHA, and `bias_attr[1]` would be used as `bias_attr` for linear in FFN. Otherwise, MHA and FFN both use it as `bias_attr` to create parameters. The `False` value means the corresponding layer would not have trainable @@ -747,16 +747,16 @@ class TransformerDecoderLayer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, `weight_attr[0]` would be used as `weight_attr` for self attention, `weight_attr[1]` would be used as `weight_attr` for cross attention, and `weight_attr[2]` would be used as `weight_attr` for linear in FFN. Otherwise, the three sub-layers all uses it as `weight_attr` to create parameters. Default: None, which means the default weight parameter property is used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, `bias_attr[0]` would be used as `bias_attr` for self attention, `bias_attr[1]` would be used as `bias_attr` for cross attention, and `bias_attr[2]` would be used as `bias_attr` for linear in FFN. Otherwise, the three sub-layers all uses it as @@ -1129,8 +1129,8 @@ class Transformer(Layer): normalization and post-precess includes dropout, residual connection. Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False - weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, the length of `weight_attr` could be 1, 2 or 3. If it is 3, + weight_attr(ParamAttr|list|tuple, optional): To specify the weight parameter property. + If it is a list/tuple, the length of `weight_attr` could be 1, 2 or 3. If it is 3, `weight_attr[0]` would be used as `weight_attr` for self attention, `weight_attr[1]` would be used as `weight_attr` for cross attention of `TransformerDecoder`, and `weight_attr[2]` would be used as `weight_attr` for linear in FFN. @@ -1142,8 +1142,8 @@ class Transformer(Layer): Default: None, which means the default weight parameter property is used. See usage for details in :code:`ParamAttr` . - bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. - If it is a tuple, the length of `bias_attr` could be 1, 2 or 3. If it is 3, + bias_attr (ParamAttr|list|tuple|bool, optional): To specify the bias parameter property. + If it is a list/tuple, the length of `bias_attr` could be 1, 2 or 3. If it is 3, `bias_attr[0]` would be used as `bias_attr` for self attention, `bias_attr[1]` would be used as `bias_attr` for cross attention of `TransformerDecoder`, and `bias_attr[2]` would be used as `bias_attr` for linear in FFN. diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index 42e2a5851c21bd3218ef319e40db6d446971c6b3..af07d706e135d1e8596d6ef940d4131eeda4e28c 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -36,20 +36,20 @@ class Adadelta(Optimizer): E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2 Args: - learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. + learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. epsilon (float): a small float number for numeric stability. Default 1.0e-6. rho (float): a floating point value indicating the decay rate. Default 0.95. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index d3077949ff0aed02e22c889a15c613d572f0871d..82615c92b7cfe19fda4f4deadbafa55dcc00a7d3 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -43,16 +43,16 @@ class Adagrad(Optimizer): It can be a float value or a ``Variable`` with a float type. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-06. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_paddle_fluid_param_attr_aramAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies, ClipGradByGlobalNorm, ClipGradByNorm and ClipGradByValue. Default None, diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index dcedf4fc5020aa1fdb79ea6b48e095f598dc27e8..4904ebb56cc9159408bbbda6bce34bf08e559a43 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -60,16 +60,16 @@ class Adam(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 9d5adf0bba508a4c213e9fc9b18bec1e737cc9ac..175d932540deee87e247faaec149770a670ff96b 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -53,16 +53,16 @@ class Adamax(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index eb88a48f30320a5d0f632ddfdb2df504b604d31f..899c2957a6a4f88e198deb1bccdaae6971d2069d 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -43,9 +43,9 @@ class AdamW(Adam): Args: learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``. It can be a float value or a LRScheduler. The default value is 0.001. - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 0.9. diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index f269bffc75ed9c07992b9a572da56b309281405c..7da933a9b72798db2606242c02065b66b333812f 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -312,8 +312,8 @@ class PiecewiseDecay(LRScheduler): learning_rate = 0.1 Args: - boundaries(list): A list of steps numbers. The type of element in the list is python int. - values(list): A list of learning rate values that will be picked during different epoch boundaries. + boundaries(list|tuple): A list/tuple of steps numbers. The type of element in the list is python int. + values(list|tuple): A list/tuple of learning rate values that will be picked during different epoch boundaries. The type of element in the list is python float. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index 932a4ad100ec4aea2ded1fa242acc1c280ac0dae..c1dc0e8ddd8af5d7deed154ce791ac25fc219105 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -49,16 +49,16 @@ class Momentum(Optimizer): learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. momentum (float): Momentum factor. The default value is 0.9. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index a050852728da96479b76f38cd1c74a3f4c4e6a6f..9425ab1431e70b53c3a27024f0866aca209002b8 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -53,7 +53,7 @@ class Optimizer(object): Args: learning_rate (float|LRScheduler): The learning rate used to update ``Parameter``. It can be a float value or any subclass of ``LRScheduler`` . - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index 7146b7d89935c9b45fbcf12726c2225ab4df5814..a2fd40bc0b369965734a24aaa7955155abc21aeb 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -78,16 +78,16 @@ class RMSProp(Optimizer): the gradient; if False, by the uncentered second moment. Setting this to True may help with training, but is slightly more expensive in terms of computation and memory. Defaults to False. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ - This parameter is required in dygraph mode. \ - The default value is None in static mode, at this time all parameters will be updated. - weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ + This parameter is required in dygraph mode. \ + The default value is None in static mode, at this time all parameters will be updated. + weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index fc208519a2e61397717f354776ad1951041ad472..ecac40aec72983306176959e7b6dff0709a8310a 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -30,16 +30,16 @@ class SGD(Optimizer): Parameters: learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``. It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001. - parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ + parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ - It canbe a float value as coeff of L2 regularization or \ - :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. - If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ - the regularization setting here in optimizer will be ignored for this parameter. \ - Otherwise, the regularization setting here in optimizer will take effect. \ - Default None, meaning there is no regularization. + It canbe a float value as coeff of L2 regularization or \ + :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. + If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \ + the regularization setting here in optimizer will be ignored for this parameter. \ + Otherwise, the regularization setting here in optimizer will take effect. \ + Default None, meaning there is no regularization. grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of some derived class of ``GradientClipBase`` . There are three cliping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 88802026db86c7cae879ff451a79fc9ddfc40002..659b7f45b26a7a2a3e75bd2d594f6b370d6ef506 100755 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -86,7 +86,7 @@ def fc(x, out.shape = (1, 2) Args: - x (Tensor|list of Tensor): A tensor or a list of tensor. The number of dimensions + x (Tensor|list[Tensor]|tuple[Tensor]): A tensor or a list/tuple of tensors. The number of dimensions of each tensor is at least 2. The data type should be float16, float32 or float64. size (int): The number of output units in this layer, which also means the feature size of output tensor. @@ -233,16 +233,16 @@ def deform_conv2d(x, deformable convolution v1. num_filters(int): The number of filter. It is as same as the output image channel. - filter_size (int|tuple): The filter size. If filter_size is a tuple, + filter_size (int|list|tuple): The filter size. If filter_size is a list/tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, the filter will be a square. - stride (int|tuple, Optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, Optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: stride = 1. - padding (int|tuple, Optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, Optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation (int|tuple, Optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, Optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: dilation = 1. groups (int, Optional): The groups number of the deformable conv layer. According to diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index e4222dcccbd700cbcf21d5f1b1a19d6cd6f4c643..dc811ea0f3fa6d78c9f2d5ab7f5a87809fb2c70b 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -132,7 +132,7 @@ def flip(x, axis, name=None): Args: x (Tensor): A Tensor(or LoDTensor) with shape :math:`[N_1, N_2,..., N_k]` . The data type of the input Tensor x should be float32, float64, int32, int64, bool. - axis (list): The axis(axes) to flip on. Negative indices for indexing from the end are accepted. + axis (list|tuple): The axis(axes) to flip on. Negative indices for indexing from the end are accepted. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . @@ -545,7 +545,7 @@ def squeeze(x, axis=None, name=None): Args: x (Tensor): The input Tensor. Supported data type: float32, float64, bool, int8, int32, int64. - axis (int|list|tuple, optional): An integer or list of integers, indicating the dimensions to be squeezed. Default is None. + axis (int|list|tuple, optional): An integer or list/tuple of integers, indicating the dimensions to be squeezed. Default is None. The range of axis is :math:`[-ndim(x), ndim(x))`. If axis is negative, :math:`axis = axis + ndim(x)`. If axis is None, all the dimensions of x of size 1 will be removed. diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 328115ac933a747bc8b473b7c31d84f3a484643e..65f57b4b4e93babc8eb33e26d2e283e70a8d58c7 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -752,7 +752,7 @@ def add_n(inputs, name=None): [14, 16, 18]] Args: - inputs (Tensor|list(Tensor)): A Tensor list. The shape and data type of the list elements should be consistent. + inputs (Tensor|list[Tensor]|tuple[Tensor]): A Tensor or a list/tuple of Tensors. The shape and data type of the list/tuple elements should be consistent. Input can be multi-dimensional Tensor, and data types can be: float32, float64, int32, int64. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` @@ -1082,7 +1082,7 @@ def max(x, axis=None, keepdim=False, name=None): Args: x(Tensor): A tensor, the data type is float32, float64, int32, int64. - axis(list|int, optional): The axis along which the maximum is computed. + axis(int|list|tuple, optional): The axis along which the maximum is computed. If :attr:`None`, compute the maximum over all elements of `x` and return a Tensor with a single element, otherwise must be in the range :math:`[-x.ndim(x), x.ndim(x))`. @@ -1174,7 +1174,7 @@ def min(x, axis=None, keepdim=False, name=None): Args: x(Tensor): A tensor, the data type is float32, float64, int32, int64. - axis(list|int, optional): The axis along which the minimum is computed. + axis(int|list|tuple, optional): The axis along which the minimum is computed. If :attr:`None`, compute the minimum over all elements of `x` and return a Tensor with a single element, otherwise must be in the range :math:`[-x.ndim, x.ndim)`. diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 005e2b123077c88f8108db5196794e50369d170e..47425476a656a9c49ed2af7177474a2f7a5f1af2 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -454,13 +454,13 @@ def deform_conv2d(x, the number of output channels, g is the number of groups, kH is the filter's height, kW is the filter's width. bias (Tensor, optional): The bias with shape [M,]. - stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride (int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. Default: stride = 1. - padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation (int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_H = dilation_W = dilation. Default: dilation = 1. deformable_groups (int): The number of deformable group partitions. @@ -644,13 +644,13 @@ class DeformConv2D(Layer): in_channels(int): The number of input channels in the input image. out_channels(int): The number of output channels produced by the convolution. kernel_size(int|list|tuple): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + stride(int|list|tuple, optional): The stride size. If stride is a list/tuple, it must contain three integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. The default value is 1. - padding (int|list|tuple, optional): The padding size. If padding is a tuple, it must + padding (int|list|tuple, optional): The padding size. If padding is a list/tuple, it must contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. Default: padding = 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + dilation(int|list|tuple, optional): The dilation size. If dilation is a list/tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1. deformable_groups (int): The number of deformable group partitions. diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index c65c2423d131900b62e5ee191288f029ae708776..c0e72877ffcddf1eb44d88904e609a9482f535cd 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -153,8 +153,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (PIL.Image|np.array): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/functional_cv2.py b/python/paddle/vision/transforms/functional_cv2.py index d50ba7b23c74a501a65a4004dc745c0f4845954e..99cbfd6dc4f8dd195960b776864bc523bdca2c71 100644 --- a/python/paddle/vision/transforms/functional_cv2.py +++ b/python/paddle/vision/transforms/functional_cv2.py @@ -136,8 +136,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (np.array): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/functional_pil.py b/python/paddle/vision/transforms/functional_pil.py index 516c28f849915c3546a40bc4a7e962968ce56b23..eee60c5452b2de1235c577b2eabb8de1cfdc1467 100644 --- a/python/paddle/vision/transforms/functional_pil.py +++ b/python/paddle/vision/transforms/functional_pil.py @@ -141,8 +141,8 @@ def pad(img, padding, fill=0, padding_mode='constant'): Args: img (PIL.Image): Image to be padded. padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. fill (float, optional): Pixel fill value for constant fill. If a tuple of diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 921e78cace6b3177100b98e4e5bbbbd63f6ab238..6eeb726fcee7056d5d2cbc3f3a4a62fc743af016 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -86,7 +86,7 @@ class Compose(object): together for a dataset transform. Args: - transforms (list): List of transforms to compose. + transforms (list|tuple): List/Tuple of transforms to compose. Returns: A compose object which is callable, __call__ for this Compose @@ -608,8 +608,8 @@ class Normalize(BaseTransform): ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` Args: - mean (int|float|list): Sequence of means for each channel. - std (int|float|list): Sequence of standard deviations for each channel. + mean (int|float|list|tuple): Sequence of means for each channel. + std (int|float|list|tuple): Sequence of standard deviations for each channel. data_format (str, optional): Data format of img, should be 'HWC' or 'CHW'. Default: 'CHW'. to_rgb (bool, optional): Whether to convert to rgb. Default: False. @@ -1022,11 +1022,11 @@ class Pad(BaseTransform): Args: padding (int|list|tuple): Padding on each border. If a single int is provided this - is used to pad all borders. If tuple of length 2 is provided this is the padding - on left/right and top/bottom respectively. If a tuple of length 4 is provided + is used to pad all borders. If list/tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a list/tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. - fill (int|list|tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + fill (int|list|tuple): Pixel fill value for constant fill. Default is 0. If a list/tuple of length 3, it is used to fill R, G, B channels respectively. This value is only used when the padding_mode is constant padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.