From c5af51ca23a97dfcf20f4eb3ce3b98089892d21d Mon Sep 17 00:00:00 2001 From: mjxs <52824616+kk-2000@users.noreply.github.com> Date: Thu, 15 Dec 2022 10:57:42 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpaddle.amp.decorate=E7=AD=89A?= =?UTF-8?q?PI=E7=9A=84=E6=96=87=E6=A1=A3=20(#48983)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 涉及到的api有 paddle.amp.decorate paddle.static.npu_places paddle.signal.istft paddle.signal.stft paddle.linalg.eigvalsh paddle.randint_like * change signal.stft * randint_like的low增加optional * ; test=docs_preview * 修改了注解格式; test=docs_preview * 修改了公式格式 * 修改了decorate的models等 * test=document_fix Co-authored-by: Ligoml <39876205+Ligoml@users.noreply.github.com> --- python/paddle/amp/auto_cast.py | 4 +-- python/paddle/fluid/framework.py | 3 +- python/paddle/fluid/optimizer.py | 11 +++--- .../incubate/operators/softmax_mask_fuse.py | 2 +- .../softmax_mask_fuse_upper_triangle.py | 2 +- python/paddle/signal.py | 34 ++++++++----------- python/paddle/tensor/linalg.py | 2 +- python/paddle/tensor/random.py | 8 ++--- 8 files changed, 30 insertions(+), 36 deletions(-) diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py index bfa7bc8e66..b26a585d5b 100644 --- a/python/paddle/amp/auto_cast.py +++ b/python/paddle/amp/auto_cast.py @@ -94,9 +94,9 @@ def decorate( Commonly, it is used together with `auto_cast` to achieve Pure float16/bfloat16 in imperative mode. Args: - models(Layer|list of Layer, optional): The defined models by user, models must be either a single model or a list of models. Default is None. + models(Layer|list of Layer): The defined models by user, models must be either a single model or a list of models. Default is None. optimizers(Optimizer|list of Optimizer, optional): The defined optimizers by user, optimizers must be either a single optimizer or a list of optimizers. Default is None. - level(str, optional): Auto mixed precision level. Accepted values are "O1" and "O2": O1 represent mixed precision, the decorator will do nothing; + level(str, optional): Auto mixed precision level. Accepted values are 'O1' and 'O2': O1 represent mixed precision, the decorator will do nothing; O2 represent Pure float16/bfloat16, the decorator will cast all parameters of models to float16/bfloat16, except BatchNorm and LayerNorm. Default is O1(amp) dtype(str, optional): Whether to use 'float16' or 'bfloat16'. Default is 'float16'. master_weight(bool, optinal): For level='O2', whether to use multi-precision during weight updating. If master_weight is None, in O2 level optimizer will use multi-precision. Default is None. diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index f9ac515aeb..4f56306e5e 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -932,7 +932,8 @@ def xpu_places(device_ids=None): def npu_places(device_ids=None): """ - **Note**: + + Note: For multi-card tasks, please use `FLAGS_selected_npus` environment variable to set the visible NPU device. This function creates a list of :code:`paddle.NPUPlace` objects. diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 2bf2c4542b..79f2cde1b1 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -4174,7 +4174,6 @@ class ModelAverage(Optimizer): class ExponentialMovingAverage: r""" - :api_attr: Static Graph Compute the moving average of parameters with exponential decay. Given a parameter :math:`\\theta`, its exponential moving average (EMA) @@ -4182,9 +4181,9 @@ class ExponentialMovingAverage: .. math:: - \\text{EMA}_0 & = 0 + \text{EMA}_0 & = 0 - \\text{EMA}_t & = \\text{decay} * \\text{EMA}_{t-1} + (1 - \\text{decay}) * \\theta_t + \text{EMA}_t & = \text{decay} * \text{EMA}_{t-1} + (1 - \text{decay}) * \theta_t The average results calculated by **update()** method will be saved in temporary variables which are created and maintained by the object, and can @@ -4193,12 +4192,12 @@ class ExponentialMovingAverage: **Bias correction**. All EMAs are initialized to :math:`0` and hence they will be zero biased, which can be corrected by divided by a factor - :math:`(1 - \\text{decay}^t)` , i.e., the actual EMAs applied to parameters + :math:`(1 - \text{decay}^t)` , i.e., the actual EMAs applied to parameters when calling **apply()** method would be .. math:: - \\widehat{\\text{EMA}}_t = \\frac{\\text{EMA}_t}{1 - \\text{decay}^t} + \widehat{\text{EMA}}_t = \frac{\text{EMA}_t}{1 - \text{decay}^t} **Decay rate scheduling**. A large decay rate very close to 1 would result in that the averages move very slowly. And a better strategy is to set a @@ -4208,7 +4207,7 @@ class ExponentialMovingAverage: .. math:: - \\min(\\text{decay}, \\frac{1 + \\text{thres_steps}}{10 + \\text{thres_steps}}) + \min(\text{decay}, \frac{1 + \text{thres_steps}}{10 + \text{thres_steps}}) Usually **thres_steps** can be the global training steps. diff --git a/python/paddle/incubate/operators/softmax_mask_fuse.py b/python/paddle/incubate/operators/softmax_mask_fuse.py index 672f4ad545..399f8e9bd9 100644 --- a/python/paddle/incubate/operators/softmax_mask_fuse.py +++ b/python/paddle/incubate/operators/softmax_mask_fuse.py @@ -28,7 +28,7 @@ def softmax_mask_fuse(x, mask, name=None): .. math:: out = softmax(x + mask) - **Note**: + Note: This API only supports GPU. Args: diff --git a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py index 936b197151..ffe8d8ac5a 100644 --- a/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py +++ b/python/paddle/incubate/operators/softmax_mask_fuse_upper_triangle.py @@ -29,7 +29,7 @@ def softmax_mask_fuse_upper_triangle(x): .. math:: out = softmax(LowerTriangular(x)) - **Note**: + Note: This API only supports GPU. Args: diff --git a/python/paddle/signal.py b/python/paddle/signal.py index 6ebc08b32d..20a925c0f3 100644 --- a/python/paddle/signal.py +++ b/python/paddle/signal.py @@ -266,18 +266,13 @@ def stft( windows of the input using this formula: .. math:: - X_t[\omega] = \sum_{n = 0}^{N-1}% - \text{window}[n]\ x[t \times H + n]\ % - e^{-{2 \pi j \omega n}/{N}} + X_t[f] = \sum_{n = 0}^{N-1} \text{window}[n]\ x[t \times H + n]\ e^{-{2 \pi j f n}/{N}} Where: - :math:`t`: The :math:`t`-th input window. - - - :math:`\omega`: Frequency :math:`0 \leq \omega < \text{n\_fft}` for `onesided=False`, - or :math:`0 \leq \omega < \lfloor \text{n\_fft} / 2 \rfloor + 1` for `onesided=True`. - + - :math:`f`: Frequency :math:`0 \leq f < \text{n_fft}` for `onesided=False`, + or :math:`0 \leq f < \lfloor \text{n_fft} / 2 \rfloor + 1` for `onesided=True`. - :math:`N`: Value of `n_fft`. - - :math:`H`: Value of `hop_length`. Args: @@ -285,11 +280,11 @@ def stft( shape `[..., seq_length]`. It can be a real-valued or a complex Tensor. n_fft (int): The number of input samples to perform Fourier transform. hop_length (int, optional): Number of steps to advance between adjacent windows - and `0 < hop_length`. Default: `None`(treated as equal to `n_fft//4`) - win_length (int, optional): The size of window. Default: `None`(treated as equal + and `0 < hop_length`. Default: `None` (treated as equal to `n_fft//4`) + win_length (int, optional): The size of window. Default: `None` (treated as equal to `n_fft`) window (Tensor, optional): A 1-dimensional tensor of size `win_length`. It will - be center padded to length `n_fft` if `win_length < n_fft`. Default: `None`( + be center padded to length `n_fft` if `win_length < n_fft`. Default: `None` ( treated as a rectangle window with value equal to 1 of size `win_length`). center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\_length` at the center of :math:`t`-th frame. Default: `True`. @@ -438,21 +433,20 @@ def istft( Inverse short-time Fourier transform (ISTFT). Reconstruct time-domain signal from the giving complex input and window tensor when - nonzero overlap-add (NOLA) condition is met: + nonzero overlap-add (NOLA) condition is met: .. math:: - \sum_{t = -\infty}^{\infty}% - \text{window}^2[n - t \times H]\ \neq \ 0, \ \text{for } all \ n + \sum_{t = -\infty}^{\infty} \text{window}^2[n - t \times H]\ \neq \ 0, \ \text{for } all \ n Where: - :math:`t`: The :math:`t`-th input window. - :math:`N`: Value of `n_fft`. - :math:`H`: Value of `hop_length`. - Result of `istft` expected to be the inverse of `paddle.signal.stft`, but it is + Result of `istft` expected to be the inverse of `paddle.signal.stft`, but it is not guaranteed to reconstruct a exactly realizible time-domain signal from a STFT complex tensor which has been modified (via masking or otherwise). Therefore, `istft` - gives the [Griffin-Lim optimal estimate](https://ieeexplore.ieee.org/document/1164317) + gives the `[Griffin-Lim optimal estimate] `_ (optimal in a least-squares sense) for the corresponding signal. Args: @@ -460,9 +454,9 @@ def istft( Tensor with shape `[..., n_fft, num_frames]`. n_fft (int): The size of Fourier transform. hop_length (int, optional): Number of steps to advance between adjacent windows - from time-domain signal and `0 < hop_length < win_length`. Default: `None`( + from time-domain signal and `0 < hop_length < win_length`. Default: `None` ( treated as equal to `n_fft//4`) - win_length (int, optional): The size of window. Default: `None`(treated as equal + win_length (int, optional): The size of window. Default: `None` (treated as equal to `n_fft`) window (Tensor, optional): A 1-dimensional tensor of size `win_length`. It will be center padded to length `n_fft` if `win_length < n_fft`. It should be a @@ -470,7 +464,7 @@ def istft( a rectangle window with value equal to 1 of size `win_length`). center (bool, optional): It means that whether the time-domain signal has been center padded. Default: `True`. - normalized (bool, optional): Control whether to scale the output by `1/sqrt(n_fft)`. + normalized (bool, optional): Control whether to scale the output by :math:`1/sqrt(n_{fft})`. Default: `False` onesided (bool, optional): It means that whether the input STFT tensor is a half of the conjugate symmetry STFT tensor transformed from a real-valued signal @@ -486,7 +480,7 @@ def istft( Returns: A tensor of least squares estimation of the reconstructed signal(s) with shape - `[..., seq_length]` + `[..., seq_length]` Examples: .. code-block:: python diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 2a3ae8001e..d91b63dc9a 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -3261,7 +3261,7 @@ def eigvalsh(x, UPLO='L', name=None): complex Hermitian (conjugate symmetric) or a real symmetric matrix. Args: - x (Tensor): A tensor with shape :math:`[_, M, M]` , The data type of the input Tensor x + x (Tensor): A tensor with shape :math:`[*, M, M]` , where * is zero or greater batch dimension. The data type of the input Tensor x should be one of float32, float64, complex64, complex128. UPLO(str, optional): Lower triangular part of a (‘L’, default) or the upper triangular part (‘U’). name(str, optional): The default value is None. Normally there is no need for user to set this diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 3bbf7831e8..1922bdca30 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -878,14 +878,14 @@ def randint_like(x, low=0, high=None, dtype=None, name=None): If ``high`` is None (the default), the range is [0, ``low``). Args: - x (Tensor): The input tensor which specifies shape. The dtype of ``x`` + x (Tensor): The input multi-dimensional tensor which specifies shape. The dtype of ``x`` can be bool, int32, int64, float16, float32, float64. - low (int): The lower bound on the range of random values to generate. + low (int, optional): The lower bound on the range of random values to generate. The ``low`` is included in the range. If ``high`` is None, the range is [0, ``low``). Default is 0. high (int, optional): The upper bound on the range of random values to - generate, the ``high`` is excluded in the range. Default is None - (see above for behavior if high = None). Default is None. + generate, the ``high`` is excluded in the range. Default is None. + If ``high`` is None, the range is [0, ``low``). dtype (str|np.dtype, optional): The data type of the output tensor. Supported data types: bool, int32, int64, float16, float32, float64. If ``dytpe`` is None, the data type is the -- GitLab