未验证 提交 3815d7aa 编写于 作者: L Leo Chen 提交者: GitHub

Upgrade string literals to raw string (#28989)

* upgrade comment string to raw string

* fix string in

* fix string with ' '

* revert update on comments

* upgrade only necessary

* fix sample code checker

* fix comments with '''
上级 767d0ba2
#!/bin/python #!/bin/python
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# #
import platform import platform
from sys import argv from sys import argv
...@@ -120,7 +135,7 @@ python setup.py install ...@@ -120,7 +135,7 @@ python setup.py install
self.py_str = ["py27", "py35", "py36", "py37"] self.py_str = ["py27", "py35", "py36", "py37"]
self.pip_end = ".whl --no-deps" self.pip_end = ".whl --no-deps"
self.pip_prefix_linux = "pip install /package/paddlepaddle" self.pip_prefix_linux = "pip install /package/paddlepaddle"
self.pip_prefix_windows = "pip install C:\package\paddlepaddle" self.pip_prefix_windows = r"pip install C:\package\paddlepaddle"
self.pip_gpu = "_gpu-" self.pip_gpu = "_gpu-"
self.pip_cpu = "-" self.pip_cpu = "-"
self.mac_pip = [ self.mac_pip = [
...@@ -216,7 +231,7 @@ package: ...@@ -216,7 +231,7 @@ package:
- matplotlib""" - matplotlib"""
if not (cuda_str == None): if not (cuda_str == None):
meta_str = meta_str + cuda_str meta_str = meta_str + cuda_str
blt_str = var.blt_const + blt_var blt_str = var.blt_const + blt_var
if (python_str == var.python27): if (python_str == var.python27):
blt_str = blt_str + """ blt_str = blt_str + """
...@@ -224,7 +239,7 @@ package: ...@@ -224,7 +239,7 @@ package:
else: else:
meta_str = meta_str + """ meta_str = meta_str + """
- opencv>=3.4.2""" - opencv>=3.4.2"""
meta_str = meta_str + var.test + var.about meta_str = meta_str + var.test + var.about
meta_filename = "meta.yaml" meta_filename = "meta.yaml"
build_filename = "bld.bat" build_filename = "bld.bat"
......
...@@ -116,8 +116,8 @@ def train(word_idx): ...@@ -116,8 +116,8 @@ def train(word_idx):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
re.compile("aclImdb/train/pos/.*\.txt$"), re.compile(r"aclImdb/train/pos/.*\.txt$"),
re.compile("aclImdb/train/neg/.*\.txt$"), word_idx) re.compile(r"aclImdb/train/neg/.*\.txt$"), word_idx)
@deprecated( @deprecated(
...@@ -137,8 +137,8 @@ def test(word_idx): ...@@ -137,8 +137,8 @@ def test(word_idx):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
re.compile("aclImdb/test/pos/.*\.txt$"), re.compile(r"aclImdb/test/pos/.*\.txt$"),
re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) re.compile(r"aclImdb/test/neg/.*\.txt$"), word_idx)
@deprecated( @deprecated(
...@@ -153,7 +153,7 @@ def word_dict(): ...@@ -153,7 +153,7 @@ def word_dict():
:rtype: dict :rtype: dict
""" """
return build_dict( return build_dict(
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) re.compile(r"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
@deprecated( @deprecated(
......
...@@ -18,13 +18,13 @@ import paddle.dataset.imdb ...@@ -18,13 +18,13 @@ import paddle.dataset.imdb
import unittest import unittest
import re import re
TRAIN_POS_PATTERN = re.compile("aclImdb/train/pos/.*\.txt$") TRAIN_POS_PATTERN = re.compile(r"aclImdb/train/pos/.*\.txt$")
TRAIN_NEG_PATTERN = re.compile("aclImdb/train/neg/.*\.txt$") TRAIN_NEG_PATTERN = re.compile(r"aclImdb/train/neg/.*\.txt$")
TRAIN_PATTERN = re.compile("aclImdb/train/.*\.txt$") TRAIN_PATTERN = re.compile(r"aclImdb/train/.*\.txt$")
TEST_POS_PATTERN = re.compile("aclImdb/test/pos/.*\.txt$") TEST_POS_PATTERN = re.compile(r"aclImdb/test/pos/.*\.txt$")
TEST_NEG_PATTERN = re.compile("aclImdb/test/neg/.*\.txt$") TEST_NEG_PATTERN = re.compile(r"aclImdb/test/neg/.*\.txt$")
TEST_PATTERN = re.compile("aclImdb/test/.*\.txt$") TEST_PATTERN = re.compile(r"aclImdb/test/.*\.txt$")
class TestIMDB(unittest.TestCase): class TestIMDB(unittest.TestCase):
......
...@@ -862,7 +862,7 @@ class DistributedStrategy(object): ...@@ -862,7 +862,7 @@ class DistributedStrategy(object):
@property @property
def dgc_configs(self): def dgc_configs(self):
""" r"""
Set Deep Gradient Compression training configurations. In general, dgc has serveral configurable Set Deep Gradient Compression training configurations. In general, dgc has serveral configurable
settings that can be configured through a dict. settings that can be configured through a dict.
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" r"""
fleetrun is a module that spawns multiple distributed fleetrun is a module that spawns multiple distributed
process on each training node for gpu training and cpu training. process on each training node for gpu training and cpu training.
Usage: Usage:
......
...@@ -158,13 +158,13 @@ class ParameterServerOptimizer(MetaOptimizerBase): ...@@ -158,13 +158,13 @@ class ParameterServerOptimizer(MetaOptimizerBase):
['vm_stat'], stdout=subprocess.PIPE).communicate()[0] ['vm_stat'], stdout=subprocess.PIPE).communicate()[0]
# Process vm_stat # Process vm_stat
vmLines = vm.split('\n') vmLines = vm.split('\n')
sep = re.compile(':[\s]+') sep = re.compile(r':[\s]+')
vmStats = {} vmStats = {}
for row in range(1, len(vmLines) - 2): for row in range(1, len(vmLines) - 2):
rowText = vmLines[row].strip() rowText = vmLines[row].strip()
rowElements = sep.split(rowText) rowElements = sep.split(rowText)
vmStats[(rowElements[0] vmStats[(rowElements[0]
)] = int(rowElements[1].strip('\.')) * 4096 )] = int(rowElements[1].strip(r'\.')) * 4096
return vmStats["Pages free"] return vmStats["Pages free"]
elif platform.system() == "Linux": elif platform.system() == "Linux":
mems = {} mems = {}
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" r"""
paddle.distributed.launch is a module that spawns multiple distributed paddle.distributed.launch is a module that spawns multiple distributed
process on each training node for gpu training. process on each training node for gpu training.
Usage: Usage:
......
...@@ -166,7 +166,7 @@ class Distribution(object): ...@@ -166,7 +166,7 @@ class Distribution(object):
class Uniform(Distribution): class Uniform(Distribution):
"""Uniform distribution with `low` and `high` parameters. r"""Uniform distribution with `low` and `high` parameters.
Mathematical Details Mathematical Details
...@@ -374,7 +374,7 @@ class Uniform(Distribution): ...@@ -374,7 +374,7 @@ class Uniform(Distribution):
return elementwise_div((lb * ub), (self.high - self.low), name=name) return elementwise_div((lb * ub), (self.high - self.low), name=name)
def entropy(self): def entropy(self):
"""Shannon entropy in nats. r"""Shannon entropy in nats.
The entropy is The entropy is
...@@ -391,7 +391,7 @@ class Uniform(Distribution): ...@@ -391,7 +391,7 @@ class Uniform(Distribution):
class Normal(Distribution): class Normal(Distribution):
"""The Normal distribution with location `loc` and `scale` parameters. r"""The Normal distribution with location `loc` and `scale` parameters.
Mathematical details Mathematical details
...@@ -534,7 +534,7 @@ class Normal(Distribution): ...@@ -534,7 +534,7 @@ class Normal(Distribution):
return output return output
def entropy(self): def entropy(self):
"""Shannon entropy in nats. r"""Shannon entropy in nats.
The entropy is The entropy is
...@@ -599,7 +599,7 @@ class Normal(Distribution): ...@@ -599,7 +599,7 @@ class Normal(Distribution):
name=name) name=name)
def kl_divergence(self, other): def kl_divergence(self, other):
"""The KL-divergence between two normal distributions. r"""The KL-divergence between two normal distributions.
The probability density function (pdf) is The probability density function (pdf) is
...@@ -644,7 +644,7 @@ class Normal(Distribution): ...@@ -644,7 +644,7 @@ class Normal(Distribution):
class Categorical(Distribution): class Categorical(Distribution):
""" r"""
Categorical distribution is a discrete probability distribution that Categorical distribution is a discrete probability distribution that
describes the possible results of a random variable that can take on describes the possible results of a random variable that can take on
one of K possible categories, with the probability of each category one of K possible categories, with the probability of each category
......
...@@ -40,7 +40,7 @@ class BaseErrorClipAttr(object): ...@@ -40,7 +40,7 @@ class BaseErrorClipAttr(object):
class ErrorClipByValue(BaseErrorClipAttr): class ErrorClipByValue(BaseErrorClipAttr):
""" r"""
Clips tensor values to the range [min, max]. Clips tensor values to the range [min, max].
Given a tensor ``t`` (see Examples below), this operation clips its value \ Given a tensor ``t`` (see Examples below), this operation clips its value \
...@@ -241,7 +241,7 @@ class ClipGradByValue(ClipGradBase): ...@@ -241,7 +241,7 @@ class ClipGradByValue(ClipGradBase):
class ClipGradByNorm(ClipGradBase): class ClipGradByNorm(ClipGradBase):
""" r"""
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` . Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio. - If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
...@@ -343,7 +343,7 @@ class ClipGradByNorm(ClipGradBase): ...@@ -343,7 +343,7 @@ class ClipGradByNorm(ClipGradBase):
class ClipGradByGlobalNorm(ClipGradBase): class ClipGradByGlobalNorm(ClipGradBase):
""" r"""
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` . :math:`t\_list` , and limit it to ``clip_norm`` .
......
...@@ -137,7 +137,7 @@ def var_conv_2d(input, ...@@ -137,7 +137,7 @@ def var_conv_2d(input,
act=None, act=None,
dtype='float32', dtype='float32',
name=None): name=None):
""" r"""
The var_conv_2d layer calculates the output base on the :attr:`input` with variable length, The var_conv_2d layer calculates the output base on the :attr:`input` with variable length,
row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`, row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`,
and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with
...@@ -477,7 +477,7 @@ def fused_embedding_seq_pool(input, ...@@ -477,7 +477,7 @@ def fused_embedding_seq_pool(input,
combiner='sum', combiner='sum',
param_attr=None, param_attr=None,
dtype='float32'): dtype='float32'):
""" r"""
**Embedding Sequence pool** **Embedding Sequence pool**
This layer is the fusion of lookup table and sequence_pool. This layer is the fusion of lookup table and sequence_pool.
...@@ -1442,7 +1442,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None): ...@@ -1442,7 +1442,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None):
def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'): def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'):
""" r"""
**Pull Box Extended Sparse Layer** **Pull Box Extended Sparse Layer**
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
BoxPS lookup table. The result of this lookup is the embedding of each ID in the BoxPS lookup table. The result of this lookup is the embedding of each ID in the
...@@ -1640,7 +1640,7 @@ def fused_bn_add_act(x, ...@@ -1640,7 +1640,7 @@ def fused_bn_add_act(x,
moving_variance_name=None, moving_variance_name=None,
act=None, act=None,
name=None): name=None):
""" r"""
This Op performs batch norm on input x, and adds the result to input y. Then This Op performs batch norm on input x, and adds the result to input y. Then
it performs activation on the sum. The data format of inputs must be NHWC it performs activation on the sum. The data format of inputs must be NHWC
`[batch, in_height, in_width, in_channels]`. `[batch, in_height, in_width, in_channels]`.
......
...@@ -175,7 +175,7 @@ def basic_gru(input, ...@@ -175,7 +175,7 @@ def basic_gru(input,
activation=None, activation=None,
dtype='float32', dtype='float32',
name='basic_gru'): name='basic_gru'):
""" r"""
GRU implementation using basic operator, supports multiple layers and bidirectional gru. GRU implementation using basic operator, supports multiple layers and bidirectional gru.
.. math:: .. math::
...@@ -418,7 +418,7 @@ def basic_lstm(input, ...@@ -418,7 +418,7 @@ def basic_lstm(input,
forget_bias=1.0, forget_bias=1.0,
dtype='float32', dtype='float32',
name='basic_lstm'): name='basic_lstm'):
""" r"""
LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM. LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM.
.. math:: .. math::
...@@ -697,7 +697,7 @@ def basic_lstm(input, ...@@ -697,7 +697,7 @@ def basic_lstm(input,
class BasicLSTMUnit(Layer): class BasicLSTMUnit(Layer):
""" r"""
**** ****
BasicLSTMUnit class, Using basic operator to build LSTM BasicLSTMUnit class, Using basic operator to build LSTM
The algorithm can be described as the code below. The algorithm can be described as the code below.
......
...@@ -44,7 +44,7 @@ DEBUG = False ...@@ -44,7 +44,7 @@ DEBUG = False
def memory_usage(program, batch_size): def memory_usage(program, batch_size):
""" r"""
Get the estimate memory usage of program with input batch size. Get the estimate memory usage of program with input batch size.
Args: Args:
......
...@@ -64,7 +64,7 @@ class ImperativeQuantAware(object): ...@@ -64,7 +64,7 @@ class ImperativeQuantAware(object):
act_preprocess_layer=None, act_preprocess_layer=None,
weight_quantize_layer=None, weight_quantize_layer=None,
act_quantize_layer=None): act_quantize_layer=None):
""" r"""
The constructor for ImperativeQuantAware. The constructor for ImperativeQuantAware.
Args: Args:
......
...@@ -30,7 +30,7 @@ __all__ = [ ...@@ -30,7 +30,7 @@ __all__ = [
class FakeQuantMovingAverage(layers.Layer): class FakeQuantMovingAverage(layers.Layer):
""" r"""
FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant. FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant.
Its computational formula is described as below: Its computational formula is described as below:
...@@ -128,7 +128,7 @@ class FakeQuantMovingAverage(layers.Layer): ...@@ -128,7 +128,7 @@ class FakeQuantMovingAverage(layers.Layer):
class FakeQuantAbsMax(layers.Layer): class FakeQuantAbsMax(layers.Layer):
""" r"""
FakeQuantAbsMax layer does the abs_max quant and then dequant. FakeQuantAbsMax layer does the abs_max quant and then dequant.
Its computational formula is described as below: Its computational formula is described as below:
...@@ -545,7 +545,7 @@ class QuantizedLinear(layers.Layer): ...@@ -545,7 +545,7 @@ class QuantizedLinear(layers.Layer):
class MovingAverageAbsMaxScale(layers.Layer): class MovingAverageAbsMaxScale(layers.Layer):
def __init__(self, name=None, moving_rate=0.9, dtype='float32'): def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
""" r"""
MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer. MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer.
Its computational formula is described as below: Its computational formula is described as below:
......
...@@ -37,7 +37,7 @@ class QuantInt8MkldnnPass(object): ...@@ -37,7 +37,7 @@ class QuantInt8MkldnnPass(object):
""" """
def __init__(self, _scope=None, _place=None): def __init__(self, _scope=None, _place=None):
""" r"""
Args: Args:
scope(fluid.Scope): scope is used to initialize the new parameters. scope(fluid.Scope): scope is used to initialize the new parameters.
place(fluid.CPUPlace): place is used to initialize the new parameters. place(fluid.CPUPlace): place is used to initialize the new parameters.
......
...@@ -239,7 +239,7 @@ class QuantizationTransformPass(object): ...@@ -239,7 +239,7 @@ class QuantizationTransformPass(object):
act_preprocess_func=None, act_preprocess_func=None,
optimizer_func=None, optimizer_func=None,
executor=None): executor=None):
""" r"""
Constructor. Constructor.
Args: Args:
......
...@@ -33,7 +33,7 @@ _logger = get_logger( ...@@ -33,7 +33,7 @@ _logger = get_logger(
class HDFSClient(object): class HDFSClient(object):
""" r"""
A tool of HDFS A tool of HDFS
Args: Args:
...@@ -376,7 +376,7 @@ class HDFSClient(object): ...@@ -376,7 +376,7 @@ class HDFSClient(object):
_logger.info("HDFS list path: {} successfully".format(hdfs_path)) _logger.info("HDFS list path: {} successfully".format(hdfs_path))
ret_lines = [] ret_lines = []
regex = re.compile('\s+') regex = re.compile(r'\s+')
out_lines = output.strip().split("\n") out_lines = output.strip().split("\n")
for line in out_lines: for line in out_lines:
re_line = regex.split(line) re_line = regex.split(line)
...@@ -418,7 +418,7 @@ class HDFSClient(object): ...@@ -418,7 +418,7 @@ class HDFSClient(object):
_logger.info("HDFS list all files: {} successfully".format( _logger.info("HDFS list all files: {} successfully".format(
hdfs_path)) hdfs_path))
lines = [] lines = []
regex = re.compile('\s+') regex = re.compile(r'\s+')
out_lines = output.strip().split("\n") out_lines = output.strip().split("\n")
for line in out_lines: for line in out_lines:
re_line = regex.split(line) re_line = regex.split(line)
......
...@@ -224,7 +224,7 @@ def less_than_ver(a, b): ...@@ -224,7 +224,7 @@ def less_than_ver(a, b):
import operator import operator
def to_list(s): def to_list(s):
s = re.sub('(\.0+)+$', '', s) s = re.sub(r'(\.0+)+$', '', s)
return [int(x) for x in s.split('.')] return [int(x) for x in s.split('.')]
return operator.lt(to_list(a), to_list(b)) return operator.lt(to_list(a), to_list(b))
......
...@@ -101,10 +101,11 @@ class _DatasetKind(object): ...@@ -101,10 +101,11 @@ class _DatasetKind(object):
ITER = 1 ITER = 1
@staticmethod @staticmethod
def create_fetcher(kind, dataset, auto_collate_batch, collate_fn, drop_last): def create_fetcher(kind, dataset, auto_collate_batch, collate_fn,
drop_last):
if kind == _DatasetKind.MAP: if kind == _DatasetKind.MAP:
return _MapDatasetFetcher(dataset, auto_collate_batch, return _MapDatasetFetcher(dataset, auto_collate_batch, collate_fn,
collate_fn, drop_last) drop_last)
elif kind == _DatasetKind.ITER: elif kind == _DatasetKind.ITER:
return _IterableDatasetFetcher(dataset, auto_collate_batch, return _IterableDatasetFetcher(dataset, auto_collate_batch,
collate_fn, drop_last) collate_fn, drop_last)
...@@ -240,7 +241,8 @@ class _DataLoaderIterBase(object): ...@@ -240,7 +241,8 @@ class _DataLoaderIterBase(object):
if self._dataset_kind == _DatasetKind.MAP: if self._dataset_kind == _DatasetKind.MAP:
self._sampler_iter = iter(list(range(len(self._dataset)))) self._sampler_iter = iter(list(range(len(self._dataset))))
else: else:
self._sampler_iter = iter(_InfiniteIterableSampler(self._dataset, 1)) self._sampler_iter = iter(
_InfiniteIterableSampler(self._dataset, 1))
self._collate_fn = loader.collate_fn self._collate_fn = loader.collate_fn
# LoDTensorBlockingQueue instance for create_py_reader and a thread # LoDTensorBlockingQueue instance for create_py_reader and a thread
...@@ -380,8 +382,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): ...@@ -380,8 +382,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
# NOTE(chenweihang): _worker_loop must be top level method to be pickled # NOTE(chenweihang): _worker_loop must be top level method to be pickled
def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event,
auto_collate_batch, collate_fn, init_fn, worker_id, num_workers, auto_collate_batch, collate_fn, init_fn, worker_id,
use_shared_memory): num_workers, use_shared_memory):
try: try:
# NOTE: [ mmap files clear ] When the child process exits unexpectedly, # NOTE: [ mmap files clear ] When the child process exits unexpectedly,
# some shared memory objects may have been applied for but have not yet # some shared memory objects may have been applied for but have not yet
...@@ -400,8 +402,8 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, ...@@ -400,8 +402,8 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event,
try: try:
if init_fn is not None: if init_fn is not None:
init_fn(worker_id) init_fn(worker_id)
fetcher = _DatasetKind.create_fetcher(dataset_kind, dataset, fetcher = _DatasetKind.create_fetcher(
auto_collate_batch, collate_fn, True) dataset_kind, dataset, auto_collate_batch, collate_fn, True)
except: except:
init_exception = Exception("init_fn failed in worker {}: " \ init_exception = Exception("init_fn failed in worker {}: " \
"{}".format(worker_id, sys.exc_info())) "{}".format(worker_id, sys.exc_info()))
......
...@@ -22,7 +22,7 @@ from google.protobuf import text_format ...@@ -22,7 +22,7 @@ from google.protobuf import text_format
class DownpourSGD(object): class DownpourSGD(object):
""" r"""
Distributed optimizer of downpour stochastic gradient descent Distributed optimizer of downpour stochastic gradient descent
Standard implementation of Google's Downpour SGD Standard implementation of Google's Downpour SGD
in Large Scale Distributed Deep Networks in Large Scale Distributed Deep Networks
......
...@@ -52,7 +52,7 @@ class DownpourServer(Server): ...@@ -52,7 +52,7 @@ class DownpourServer(Server):
def add_sparse_table(self, table_id, learning_rate, slot_key_vars, def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
slot_value_var): slot_value_var):
""" r"""
Args: Args:
table_id(int): id of sparse params table table_id(int): id of sparse params table
learning_rate(float): the learning rate used to update parameters. \ learning_rate(float): the learning rate used to update parameters. \
...@@ -84,7 +84,7 @@ class DownpourServer(Server): ...@@ -84,7 +84,7 @@ class DownpourServer(Server):
table.accessor.downpour_accessor_param.delete_threshold = 0.8 table.accessor.downpour_accessor_param.delete_threshold = 0.8
def add_dense_table(self, table_id, learning_rate, param_var, grad_var): def add_dense_table(self, table_id, learning_rate, param_var, grad_var):
""" r"""
Args: Args:
table_id(int): id of sparse params table table_id(int): id of sparse params table
learning_rate(float): the learning rate used to update parameters. \ learning_rate(float): the learning rate used to update parameters. \
...@@ -135,7 +135,7 @@ class DownpourWorker(Worker): ...@@ -135,7 +135,7 @@ class DownpourWorker(Worker):
def add_sparse_table(self, table_id, learning_rate, slot_key_vars, def add_sparse_table(self, table_id, learning_rate, slot_key_vars,
slot_value_vars): slot_value_vars):
""" r"""
Args: Args:
table_id(int): id of sparse params table table_id(int): id of sparse params table
learning_rate(float): the learning rate used to update parameters. \ learning_rate(float): the learning rate used to update parameters. \
...@@ -153,7 +153,7 @@ class DownpourWorker(Worker): ...@@ -153,7 +153,7 @@ class DownpourWorker(Worker):
[var.name + "@GRAD" for var in slot_value_vars]) [var.name + "@GRAD" for var in slot_value_vars])
def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars): def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars):
""" r"""
Args: Args:
table_id(int): id of sparse params table table_id(int): id of sparse params table
learning_rate(float): the learning rate used to update parameters. \ learning_rate(float): the learning rate used to update parameters. \
......
...@@ -593,7 +593,7 @@ def grad(outputs, ...@@ -593,7 +593,7 @@ def grad(outputs,
@framework.dygraph_only @framework.dygraph_only
def to_variable(value, name=None, zero_copy=None, dtype=None): def to_variable(value, name=None, zero_copy=None, dtype=None):
""" r"""
:api_attr: imperative :api_attr: imperative
The API will create a ``Variable`` or ``ComplexVariable`` object from The API will create a ``Variable`` or ``ComplexVariable`` object from
......
...@@ -183,7 +183,7 @@ class PiecewiseDecay(LearningRateDecay): ...@@ -183,7 +183,7 @@ class PiecewiseDecay(LearningRateDecay):
class NaturalExpDecay(LearningRateDecay): class NaturalExpDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies natural exponential decay to the initial learning rate. Applies natural exponential decay to the initial learning rate.
...@@ -266,7 +266,7 @@ class NaturalExpDecay(LearningRateDecay): ...@@ -266,7 +266,7 @@ class NaturalExpDecay(LearningRateDecay):
class ExponentialDecay(LearningRateDecay): class ExponentialDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies exponential decay to the learning rate. Applies exponential decay to the learning rate.
...@@ -348,7 +348,7 @@ class ExponentialDecay(LearningRateDecay): ...@@ -348,7 +348,7 @@ class ExponentialDecay(LearningRateDecay):
class InverseTimeDecay(LearningRateDecay): class InverseTimeDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies inverse time decay to the initial learning rate. Applies inverse time decay to the initial learning rate.
...@@ -426,7 +426,7 @@ class InverseTimeDecay(LearningRateDecay): ...@@ -426,7 +426,7 @@ class InverseTimeDecay(LearningRateDecay):
class PolynomialDecay(LearningRateDecay): class PolynomialDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies polynomial decay to the initial learning rate. Applies polynomial decay to the initial learning rate.
...@@ -520,7 +520,7 @@ class PolynomialDecay(LearningRateDecay): ...@@ -520,7 +520,7 @@ class PolynomialDecay(LearningRateDecay):
class CosineDecay(LearningRateDecay): class CosineDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies cosine decay to the learning rate. Applies cosine decay to the learning rate.
...@@ -578,7 +578,7 @@ class CosineDecay(LearningRateDecay): ...@@ -578,7 +578,7 @@ class CosineDecay(LearningRateDecay):
class NoamDecay(LearningRateDecay): class NoamDecay(LearningRateDecay):
""" r"""
:api_attr: imperative :api_attr: imperative
Applies Noam decay to the initial learning rate. Applies Noam decay to the initial learning rate.
......
...@@ -42,7 +42,7 @@ __all__ = [ ...@@ -42,7 +42,7 @@ __all__ = [
class Conv2D(layers.Layer): class Conv2D(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``Conv2D`` class. This interface is used to construct a callable object of the ``Conv2D`` class.
For more details, refer to code examples. For more details, refer to code examples.
The convolution2D layer calculates the output based on the input, filter The convolution2D layer calculates the output based on the input, filter
...@@ -282,7 +282,7 @@ class Conv2D(layers.Layer): ...@@ -282,7 +282,7 @@ class Conv2D(layers.Layer):
class Conv3D(layers.Layer): class Conv3D(layers.Layer):
""" r"""
**Convlution3D Layer** **Convlution3D Layer**
The convolution3D layer calculates the output based on the input, filter The convolution3D layer calculates the output based on the input, filter
...@@ -484,7 +484,7 @@ class Conv3D(layers.Layer): ...@@ -484,7 +484,7 @@ class Conv3D(layers.Layer):
class Conv3DTranspose(layers.Layer): class Conv3DTranspose(layers.Layer):
""" r"""
**Convlution3D transpose layer** **Convlution3D transpose layer**
The convolution3D transpose layer calculates the output based on the input, The convolution3D transpose layer calculates the output based on the input,
...@@ -701,7 +701,7 @@ class Conv3DTranspose(layers.Layer): ...@@ -701,7 +701,7 @@ class Conv3DTranspose(layers.Layer):
class Pool2D(layers.Layer): class Pool2D(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``Pool2D`` class. This interface is used to construct a callable object of the ``Pool2D`` class.
For more details, refer to code examples. For more details, refer to code examples.
...@@ -1009,7 +1009,7 @@ class Linear(layers.Layer): ...@@ -1009,7 +1009,7 @@ class Linear(layers.Layer):
class InstanceNorm(layers.Layer): class InstanceNorm(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``InstanceNorm`` class. This interface is used to construct a callable object of the ``InstanceNorm`` class.
For more details, refer to code examples. For more details, refer to code examples.
...@@ -1143,7 +1143,7 @@ class InstanceNorm(layers.Layer): ...@@ -1143,7 +1143,7 @@ class InstanceNorm(layers.Layer):
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
""" r"""
:alias_main: paddle.nn.BatchNorm :alias_main: paddle.nn.BatchNorm
:alias: paddle.nn.BatchNorm,paddle.nn.layer.BatchNorm,paddle.nn.layer.norm.BatchNorm :alias: paddle.nn.BatchNorm,paddle.nn.layer.BatchNorm,paddle.nn.layer.norm.BatchNorm
:old_api: paddle.fluid.dygraph.BatchNorm :old_api: paddle.fluid.dygraph.BatchNorm
...@@ -1492,7 +1492,7 @@ class Dropout(layers.Layer): ...@@ -1492,7 +1492,7 @@ class Dropout(layers.Layer):
class Embedding(layers.Layer): class Embedding(layers.Layer):
""" r"""
:alias_main: paddle.nn.Embedding :alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding :old_api: paddle.fluid.dygraph.Embedding
...@@ -1652,7 +1652,7 @@ class Embedding(layers.Layer): ...@@ -1652,7 +1652,7 @@ class Embedding(layers.Layer):
class LayerNorm(layers.Layer): class LayerNorm(layers.Layer):
""" r"""
:alias_main: paddle.nn.LayerNorm :alias_main: paddle.nn.LayerNorm
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
:old_api: paddle.fluid.dygraph.LayerNorm :old_api: paddle.fluid.dygraph.LayerNorm
...@@ -2242,7 +2242,7 @@ class NCE(layers.Layer): ...@@ -2242,7 +2242,7 @@ class NCE(layers.Layer):
class PRelu(layers.Layer): class PRelu(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``PRelu`` class. This interface is used to construct a callable object of the ``PRelu`` class.
For more details, refer to code examples. For more details, refer to code examples.
It implements three activation methods of the ``PRelu`` activation function. It implements three activation methods of the ``PRelu`` activation function.
...@@ -2350,7 +2350,7 @@ class PRelu(layers.Layer): ...@@ -2350,7 +2350,7 @@ class PRelu(layers.Layer):
class BilinearTensorProduct(layers.Layer): class BilinearTensorProduct(layers.Layer):
""" r"""
**Add Bilinear Tensor Product Layer** **Add Bilinear Tensor Product Layer**
...@@ -2467,7 +2467,7 @@ class BilinearTensorProduct(layers.Layer): ...@@ -2467,7 +2467,7 @@ class BilinearTensorProduct(layers.Layer):
class Conv2DTranspose(layers.Layer): class Conv2DTranspose(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``Conv2DTranspose`` class. This interface is used to construct a callable object of the ``Conv2DTranspose`` class.
For more details, refer to code examples. For more details, refer to code examples.
The convolution2D transpose layer calculates the output based on the input, The convolution2D transpose layer calculates the output based on the input,
...@@ -2979,7 +2979,7 @@ class GroupNorm(layers.Layer): ...@@ -2979,7 +2979,7 @@ class GroupNorm(layers.Layer):
class SpectralNorm(layers.Layer): class SpectralNorm(layers.Layer):
""" r"""
:alias_main: paddle.nn.SpectralNorm :alias_main: paddle.nn.SpectralNorm
:alias: paddle.nn.SpectralNorm,paddle.nn.layer.SpectralNorm,paddle.nn.layer.norm.SpectralNorm :alias: paddle.nn.SpectralNorm,paddle.nn.layer.SpectralNorm,paddle.nn.layer.norm.SpectralNorm
:old_api: paddle.fluid.dygraph.SpectralNorm :old_api: paddle.fluid.dygraph.SpectralNorm
......
...@@ -20,7 +20,7 @@ __all__ = ['LSTMCell', 'GRUCell'] ...@@ -20,7 +20,7 @@ __all__ = ['LSTMCell', 'GRUCell']
class LSTMCell(Layer): class LSTMCell(Layer):
""" r"""
LSTMCell implementation using basic operators. LSTMCell implementation using basic operators.
There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation. There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation.
The algorithm can be described as the equations below. The algorithm can be described as the equations below.
...@@ -236,7 +236,7 @@ class LSTMCell(Layer): ...@@ -236,7 +236,7 @@ class LSTMCell(Layer):
class GRUCell(Layer): class GRUCell(Layer):
""" r"""
GRU implementation using basic operators. GRU implementation using basic operators.
There are two GRUCell version, the default one is compatible with CUDNN GRU implementation. There are two GRUCell version, the default one is compatible with CUDNN GRU implementation.
The algorithm can be described as the equations below. The algorithm can be described as the equations below.
......
...@@ -2255,7 +2255,7 @@ class Operator(object): ...@@ -2255,7 +2255,7 @@ class Operator(object):
return self.desc.type() return self.desc.type()
def input(self, name): def input(self, name):
""" r"""
Get the input arguments according to the input parameter name. Get the input arguments according to the input parameter name.
Args: Args:
...@@ -2306,7 +2306,7 @@ class Operator(object): ...@@ -2306,7 +2306,7 @@ class Operator(object):
return self.desc.output_arg_names() return self.desc.output_arg_names()
def output(self, name): def output(self, name):
""" r"""
Get output arguments by the output parameter name. Get output arguments by the output parameter name.
Args: Args:
......
...@@ -527,7 +527,7 @@ class DownpourWorker(Worker): ...@@ -527,7 +527,7 @@ class DownpourWorker(Worker):
def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars, def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars,
dense_start_table_id, sparse_table_names): dense_start_table_id, sparse_table_names):
""" r"""
Args: Args:
table_id(int): id of sparse params table table_id(int): id of sparse params table
learning_rate(float): the learning rate used to update parameters. \ learning_rate(float): the learning rate used to update parameters. \
......
...@@ -153,7 +153,7 @@ class FleetUtil(object): ...@@ -153,7 +153,7 @@ class FleetUtil(object):
stat_pos="_generated_var_2", stat_pos="_generated_var_2",
stat_neg="_generated_var_3", stat_neg="_generated_var_3",
print_prefix=""): print_prefix=""):
""" r"""
Print global auc of all distributed workers. Print global auc of all distributed workers.
Args: Args:
...@@ -1073,7 +1073,7 @@ class FleetUtil(object): ...@@ -1073,7 +1073,7 @@ class FleetUtil(object):
hadoop_fs_name, hadoop_fs_name,
hadoop_fs_ugi, hadoop_fs_ugi,
hadoop_home="$HADOOP_HOME"): hadoop_home="$HADOOP_HOME"):
""" r"""
get last saved base xbox info from xbox_base_done.txt get last saved base xbox info from xbox_base_done.txt
Args: Args:
...@@ -1118,7 +1118,7 @@ class FleetUtil(object): ...@@ -1118,7 +1118,7 @@ class FleetUtil(object):
hadoop_fs_name, hadoop_fs_name,
hadoop_fs_ugi, hadoop_fs_ugi,
hadoop_home="$HADOOP_HOME"): hadoop_home="$HADOOP_HOME"):
""" r"""
get last saved xbox info from xbox_patch_done.txt get last saved xbox info from xbox_patch_done.txt
Args: Args:
...@@ -1164,7 +1164,7 @@ class FleetUtil(object): ...@@ -1164,7 +1164,7 @@ class FleetUtil(object):
hadoop_fs_name, hadoop_fs_name,
hadoop_fs_ugi, hadoop_fs_ugi,
hadoop_home="$HADOOP_HOME"): hadoop_home="$HADOOP_HOME"):
""" r"""
get last saved model info from donefile.txt get last saved model info from donefile.txt
Args: Args:
...@@ -1279,7 +1279,7 @@ class FleetUtil(object): ...@@ -1279,7 +1279,7 @@ class FleetUtil(object):
q_name="q", q_name="q",
pos_ins_num_name="pos", pos_ins_num_name="pos",
total_ins_num_name="total"): total_ins_num_name="total"):
""" r"""
get global metrics, including auc, bucket_error, mae, rmse, get global metrics, including auc, bucket_error, mae, rmse,
actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num. actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num.
...@@ -1469,7 +1469,7 @@ class FleetUtil(object): ...@@ -1469,7 +1469,7 @@ class FleetUtil(object):
pos_ins_num_name="pos", pos_ins_num_name="pos",
total_ins_num_name="total", total_ins_num_name="total",
print_prefix=""): print_prefix=""):
""" r"""
print global metrics, including auc, bucket_error, mae, rmse, print global metrics, including auc, bucket_error, mae, rmse,
actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num. actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num.
......
...@@ -459,7 +459,7 @@ class TruncatedNormalInitializer(Initializer): ...@@ -459,7 +459,7 @@ class TruncatedNormalInitializer(Initializer):
class XavierInitializer(Initializer): class XavierInitializer(Initializer):
""" r"""
This class implements the Xavier weight initializer from the paper This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural `Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_ networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
...@@ -595,7 +595,7 @@ class XavierInitializer(Initializer): ...@@ -595,7 +595,7 @@ class XavierInitializer(Initializer):
class MSRAInitializer(Initializer): class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer r"""Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper This class implements the weight initialization from the paper
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
......
...@@ -137,7 +137,7 @@ def embedding(input, ...@@ -137,7 +137,7 @@ def embedding(input,
padding_idx=None, padding_idx=None,
param_attr=None, param_attr=None,
dtype='float32'): dtype='float32'):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The operator is used to lookup embeddings vector of ids provided by :attr:`input` . The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
......
...@@ -59,7 +59,7 @@ class LayerHelperBase(object): ...@@ -59,7 +59,7 @@ class LayerHelperBase(object):
return cls.__dtype return cls.__dtype
def to_variable(self, value, name=None): def to_variable(self, value, name=None):
""" r"""
The API will create a ``Variable`` object from numpy\.ndarray or Variable object. The API will create a ``Variable`` object from numpy\.ndarray or Variable object.
Parameters: Parameters:
......
...@@ -3012,7 +3012,7 @@ class DynamicRNN(object): ...@@ -3012,7 +3012,7 @@ class DynamicRNN(object):
self.mem_link = [] self.mem_link = []
def step_input(self, x, level=0): def step_input(self, x, level=0):
""" r"""
This function is used to set sequence x as DynamicRNN's input. This function is used to set sequence x as DynamicRNN's input.
The maximum sequence length in x determines the number of time steps The maximum sequence length in x determines the number of time steps
the RNN unit will be executed. DynamicRNN can take multiple inputs. the RNN unit will be executed. DynamicRNN can take multiple inputs.
...@@ -3144,7 +3144,7 @@ class DynamicRNN(object): ...@@ -3144,7 +3144,7 @@ class DynamicRNN(object):
return array_read(array=input_array, i=self.step_idx) return array_read(array=input_array, i=self.step_idx)
def static_input(self, x): def static_input(self, x):
""" r"""
This function is used to set x as DynamicRNN's static input. It is optional. This function is used to set x as DynamicRNN's static input. It is optional.
- Case 1, set static input with LoD - Case 1, set static input with LoD
...@@ -3348,7 +3348,7 @@ class DynamicRNN(object): ...@@ -3348,7 +3348,7 @@ class DynamicRNN(object):
value=0.0, value=0.0,
need_reorder=False, need_reorder=False,
dtype='float32'): dtype='float32'):
""" r"""
Create a memory Variable for DynamicRNN to deliver data cross time steps. Create a memory Variable for DynamicRNN to deliver data cross time steps.
It can be initialized by an existing Tensor or a constant Tensor of given It can be initialized by an existing Tensor or a constant Tensor of given
dtype and shape. dtype and shape.
......
...@@ -77,7 +77,7 @@ def retinanet_target_assign(bbox_pred, ...@@ -77,7 +77,7 @@ def retinanet_target_assign(bbox_pred,
num_classes=1, num_classes=1,
positive_overlap=0.5, positive_overlap=0.5,
negative_overlap=0.4): negative_overlap=0.4):
""" r"""
**Target Assign Layer for the detector RetinaNet.** **Target Assign Layer for the detector RetinaNet.**
This OP finds out positive and negative samples from all anchors This OP finds out positive and negative samples from all anchors
...@@ -471,7 +471,7 @@ def rpn_target_assign(bbox_pred, ...@@ -471,7 +471,7 @@ def rpn_target_assign(bbox_pred,
def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25):
""" r"""
:alias_main: paddle.nn.functional.sigmoid_focal_loss :alias_main: paddle.nn.functional.sigmoid_focal_loss
:alias: paddle.nn.functional.sigmoid_focal_loss,paddle.nn.functional.loss.sigmoid_focal_loss :alias: paddle.nn.functional.sigmoid_focal_loss,paddle.nn.functional.loss.sigmoid_focal_loss
:old_api: paddle.fluid.layers.sigmoid_focal_loss :old_api: paddle.fluid.layers.sigmoid_focal_loss
...@@ -821,7 +821,7 @@ def box_coder(prior_box, ...@@ -821,7 +821,7 @@ def box_coder(prior_box,
box_normalized=True, box_normalized=True,
name=None, name=None,
axis=0): axis=0):
""" r"""
**Box Coder Layer** **Box Coder Layer**
...@@ -1523,7 +1523,7 @@ def ssd_loss(location, ...@@ -1523,7 +1523,7 @@ def ssd_loss(location,
mining_type='max_negative', mining_type='max_negative',
normalize=True, normalize=True,
sample_size=None): sample_size=None):
""" r"""
:alias_main: paddle.nn.functional.ssd_loss :alias_main: paddle.nn.functional.ssd_loss
:alias: paddle.nn.functional.ssd_loss,paddle.nn.functional.loss.ssd_loss :alias: paddle.nn.functional.ssd_loss,paddle.nn.functional.loss.ssd_loss
:old_api: paddle.fluid.layers.ssd_loss :old_api: paddle.fluid.layers.ssd_loss
...@@ -1930,7 +1930,7 @@ def density_prior_box(input, ...@@ -1930,7 +1930,7 @@ def density_prior_box(input,
offset=0.5, offset=0.5,
flatten_to_2d=False, flatten_to_2d=False,
name=None): name=None):
""" r"""
This op generates density prior boxes for SSD(Single Shot MultiBox Detector) This op generates density prior boxes for SSD(Single Shot MultiBox Detector)
algorithm. Each position of the input produce N prior boxes, N is algorithm. Each position of the input produce N prior boxes, N is
...@@ -2741,7 +2741,7 @@ def generate_proposal_labels(rpn_rois, ...@@ -2741,7 +2741,7 @@ def generate_proposal_labels(rpn_rois,
def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois, def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
labels_int32, num_classes, resolution): labels_int32, num_classes, resolution):
""" r"""
**Generate Mask Labels for Mask-RCNN** **Generate Mask Labels for Mask-RCNN**
...@@ -3671,7 +3671,7 @@ def distribute_fpn_proposals(fpn_rois, ...@@ -3671,7 +3671,7 @@ def distribute_fpn_proposals(fpn_rois,
refer_scale, refer_scale,
rois_num=None, rois_num=None,
name=None): name=None):
""" r"""
**This op only takes LoDTensor as input.** In Feature Pyramid Networks **This op only takes LoDTensor as input.** In Feature Pyramid Networks
(FPN) models, it is needed to distribute all proposals into different FPN (FPN) models, it is needed to distribute all proposals into different FPN
......
...@@ -113,7 +113,7 @@ class Distribution(object): ...@@ -113,7 +113,7 @@ class Distribution(object):
class Uniform(Distribution): class Uniform(Distribution):
"""Uniform distribution with `low` and `high` parameters. r"""Uniform distribution with `low` and `high` parameters.
Mathematical Details Mathematical Details
...@@ -258,7 +258,7 @@ class Uniform(Distribution): ...@@ -258,7 +258,7 @@ class Uniform(Distribution):
class Normal(Distribution): class Normal(Distribution):
"""The Normal distribution with location `loc` and `scale` parameters. r"""The Normal distribution with location `loc` and `scale` parameters.
Mathematical details Mathematical details
...@@ -423,7 +423,7 @@ class Normal(Distribution): ...@@ -423,7 +423,7 @@ class Normal(Distribution):
class Categorical(Distribution): class Categorical(Distribution):
""" r"""
Categorical distribution is a discrete probability distribution that Categorical distribution is a discrete probability distribution that
describes the possible results of a random variable that can take on describes the possible results of a random variable that can take on
one of K possible categories, with the probability of each category one of K possible categories, with the probability of each category
...@@ -529,7 +529,7 @@ class Categorical(Distribution): ...@@ -529,7 +529,7 @@ class Categorical(Distribution):
class MultivariateNormalDiag(Distribution): class MultivariateNormalDiag(Distribution):
""" r"""
A multivariate normal (also called Gaussian) distribution parameterized by a mean vector A multivariate normal (also called Gaussian) distribution parameterized by a mean vector
and a covariance matrix. and a covariance matrix.
......
...@@ -440,7 +440,7 @@ Applies piecewise decay to the initial learning rate. ...@@ -440,7 +440,7 @@ Applies piecewise decay to the initial learning rate.
def cosine_decay(learning_rate, step_each_epoch, epochs): def cosine_decay(learning_rate, step_each_epoch, epochs):
""" r"""
Applies cosine decay to the learning rate. Applies cosine decay to the learning rate.
......
...@@ -57,7 +57,7 @@ def center_loss(input, ...@@ -57,7 +57,7 @@ def center_loss(input,
alpha, alpha,
param_attr, param_attr,
update_center=True): update_center=True):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Center loss Cost layer** **Center loss Cost layer**
...@@ -151,7 +151,7 @@ def center_loss(input, ...@@ -151,7 +151,7 @@ def center_loss(input,
def bpr_loss(input, label, name=None): def bpr_loss(input, label, name=None):
""" r"""
**Bayesian Personalized Ranking Loss Operator** **Bayesian Personalized Ranking Loss Operator**
...@@ -203,7 +203,7 @@ def bpr_loss(input, label, name=None): ...@@ -203,7 +203,7 @@ def bpr_loss(input, label, name=None):
def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
""" r"""
:alias_main: paddle.nn.functional.cross_entropy :alias_main: paddle.nn.functional.cross_entropy
:alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy :alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy
:old_api: paddle.fluid.layers.cross_entropy :old_api: paddle.fluid.layers.cross_entropy
...@@ -300,7 +300,7 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): ...@@ -300,7 +300,7 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
def square_error_cost(input, label): def square_error_cost(input, label):
""" r"""
This op accepts input predictions and target label and returns the This op accepts input predictions and target label and returns the
squared error cost. squared error cost.
...@@ -1185,7 +1185,7 @@ def softmax_with_cross_entropy(logits, ...@@ -1185,7 +1185,7 @@ def softmax_with_cross_entropy(logits,
numeric_stable_mode=True, numeric_stable_mode=True,
return_softmax=False, return_softmax=False,
axis=-1): axis=-1):
""" r"""
:alias_main: paddle.nn.functional.softmax_with_cross_entropy :alias_main: paddle.nn.functional.softmax_with_cross_entropy
:alias: paddle.nn.functional.softmax_with_cross_entropy,paddle.nn.functional.loss.softmax_with_cross_entropy :alias: paddle.nn.functional.softmax_with_cross_entropy,paddle.nn.functional.loss.softmax_with_cross_entropy
:old_api: paddle.fluid.layers.softmax_with_cross_entropy :old_api: paddle.fluid.layers.softmax_with_cross_entropy
...@@ -1312,7 +1312,7 @@ def softmax_with_cross_entropy(logits, ...@@ -1312,7 +1312,7 @@ def softmax_with_cross_entropy(logits,
def rank_loss(label, left, right, name=None): def rank_loss(label, left, right, name=None):
""" r"""
This operator implements the sort loss layer in the RankNet model. RankNet is a pairwise ranking model This operator implements the sort loss layer in the RankNet model. RankNet is a pairwise ranking model
with a training sample consisting of a pair of documents (A and B), The label (P) with a training sample consisting of a pair of documents (A and B), The label (P)
...@@ -1375,7 +1375,7 @@ def rank_loss(label, left, right, name=None): ...@@ -1375,7 +1375,7 @@ def rank_loss(label, left, right, name=None):
def margin_rank_loss(label, left, right, margin=0.1, name=None): def margin_rank_loss(label, left, right, margin=0.1, name=None):
""" r"""
Margin Ranking Loss Layer for ranking problem, Margin Ranking Loss Layer for ranking problem,
which compares left score and right score passed in. which compares left score and right score passed in.
The ranking loss can be defined as following equation: The ranking loss can be defined as following equation:
...@@ -1551,7 +1551,7 @@ def teacher_student_sigmoid_loss(input, ...@@ -1551,7 +1551,7 @@ def teacher_student_sigmoid_loss(input,
def huber_loss(input, label, delta): def huber_loss(input, label, delta):
""" r"""
This operator computes the Huber loss between input and label. This operator computes the Huber loss between input and label.
Huber loss is commonly used in regression tasks. Compared to square_error_cost, Huber loss is more robust and less sensitivity to outliers. Huber loss is commonly used in regression tasks. Compared to square_error_cost, Huber loss is more robust and less sensitivity to outliers.
...@@ -1681,7 +1681,7 @@ from .control_flow import equal ...@@ -1681,7 +1681,7 @@ from .control_flow import equal
def npair_loss(anchor, positive, labels, l2_reg=0.002): def npair_loss(anchor, positive, labels, l2_reg=0.002):
''' r'''
Read `Improved Deep Metric Learning with Multi class N pair Loss Objective\ Read `Improved Deep Metric Learning with Multi class N pair Loss Objective\
<http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/\ <http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/\
......
...@@ -114,7 +114,7 @@ def auc(input, ...@@ -114,7 +114,7 @@ def auc(input,
num_thresholds=2**12 - 1, num_thresholds=2**12 - 1,
topk=1, topk=1,
slide_steps=1): slide_steps=1):
""" r"""
**Area Under the Curve (AUC) Layer** **Area Under the Curve (AUC) Layer**
This implementation computes the AUC according to forward output and label. This implementation computes the AUC according to forward output and label.
......
...@@ -215,7 +215,7 @@ def fc(input, ...@@ -215,7 +215,7 @@ def fc(input,
bias_attr=None, bias_attr=None,
act=None, act=None,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Fully Connected Layer** **Fully Connected Layer**
...@@ -377,7 +377,7 @@ def embedding(input, ...@@ -377,7 +377,7 @@ def embedding(input,
padding_idx=None, padding_idx=None,
param_attr=None, param_attr=None,
dtype='float32'): dtype='float32'):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**WARING:** This OP will be deprecated in a future release. This OP requires the **WARING:** This OP will be deprecated in a future release. This OP requires the
...@@ -530,7 +530,7 @@ def _pull_sparse(input, ...@@ -530,7 +530,7 @@ def _pull_sparse(input,
padding_id=0, padding_id=0,
dtype='float32', dtype='float32',
scale_sparse_grad=True): scale_sparse_grad=True):
""" r"""
**Pull Fleet Sparse Layer** **Pull Fleet Sparse Layer**
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
...@@ -601,7 +601,7 @@ def _pull_sparse_v2(input, ...@@ -601,7 +601,7 @@ def _pull_sparse_v2(input,
padding_id=0, padding_id=0,
dtype='float32', dtype='float32',
scale_sparse_grad=True): scale_sparse_grad=True):
""" r"""
**Pull Fleet Sparse Layer** **Pull Fleet Sparse Layer**
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
...@@ -664,7 +664,7 @@ def _pull_sparse_v2(input, ...@@ -664,7 +664,7 @@ def _pull_sparse_v2(input,
def _pull_box_sparse(input, size, dtype='float32'): def _pull_box_sparse(input, size, dtype='float32'):
""" r"""
**Pull Box Sparse Layer** **Pull Box Sparse Layer**
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
...@@ -1050,7 +1050,7 @@ def chunk_eval(input, ...@@ -1050,7 +1050,7 @@ def chunk_eval(input,
num_chunk_types, num_chunk_types,
excluded_chunk_types=None, excluded_chunk_types=None,
seq_length=None): seq_length=None):
""" r"""
This operator computes the precision, recall and F1-score for chunk detection. This operator computes the precision, recall and F1-score for chunk detection.
It is often used in sequence tagging tasks, such as Named Entity Recognition(NER). It is often used in sequence tagging tasks, such as Named Entity Recognition(NER).
...@@ -1199,7 +1199,7 @@ def chunk_eval(input, ...@@ -1199,7 +1199,7 @@ def chunk_eval(input,
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax") @deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=False, name=None, axis=-1): def softmax(input, use_cudnn=False, name=None, axis=-1):
""" r"""
This operator implements the softmax layer. The calculation process is as follows: This operator implements the softmax layer. The calculation process is as follows:
1. The dimension :attr:`axis` of the ``input`` will be permuted to the last. 1. The dimension :attr:`axis` of the ``input`` will be permuted to the last.
...@@ -1339,7 +1339,7 @@ def conv2d(input, ...@@ -1339,7 +1339,7 @@ def conv2d(input,
act=None, act=None,
name=None, name=None,
data_format="NCHW"): data_format="NCHW"):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The convolution2D layer calculates the output based on the input, filter The convolution2D layer calculates the output based on the input, filter
...@@ -1618,7 +1618,7 @@ def conv3d(input, ...@@ -1618,7 +1618,7 @@ def conv3d(input,
act=None, act=None,
name=None, name=None,
data_format="NCDHW"): data_format="NCDHW"):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The convolution3D layer calculates the output based on the input, filter The convolution3D layer calculates the output based on the input, filter
...@@ -2325,7 +2325,7 @@ def adaptive_pool2d(input, ...@@ -2325,7 +2325,7 @@ def adaptive_pool2d(input,
pool_type="max", pool_type="max",
require_index=False, require_index=False,
name=None): name=None):
""" r"""
This operation calculates the output based on the input, pool_size, This operation calculates the output based on the input, pool_size,
pool_type parameters. Input(X) and output(Out) are in NCHW format, where N is batch pool_type parameters. Input(X) and output(Out) are in NCHW format, where N is batch
...@@ -2471,7 +2471,7 @@ def adaptive_pool3d(input, ...@@ -2471,7 +2471,7 @@ def adaptive_pool3d(input,
pool_type="max", pool_type="max",
require_index=False, require_index=False,
name=None): name=None):
""" r"""
This operation calculates the output based on the input, pool_size, This operation calculates the output based on the input, pool_size,
pool_type parameters. Input(X) and output(Out) are in NCDHW format, where N is batch pool_type parameters. Input(X) and output(Out) are in NCDHW format, where N is batch
...@@ -2638,7 +2638,7 @@ def batch_norm(input, ...@@ -2638,7 +2638,7 @@ def batch_norm(input,
moving_variance_name=None, moving_variance_name=None,
do_model_average_for_mean_and_var=True, do_model_average_for_mean_and_var=True,
use_global_stats=False): use_global_stats=False):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Batch Normalization Layer** **Batch Normalization Layer**
...@@ -2902,7 +2902,7 @@ def inplace_abn(input, ...@@ -2902,7 +2902,7 @@ def inplace_abn(input,
do_model_average_for_mean_and_var=True, do_model_average_for_mean_and_var=True,
use_global_stats=False, use_global_stats=False,
act_alpha=1.0): act_alpha=1.0):
""" r"""
**In-place Activation Batch Normalization Layer** **In-place Activation Batch Normalization Layer**
This layer calculates batch normalization and activation with in-place memory. This layer calculates batch normalization and activation with in-place memory.
...@@ -3096,7 +3096,7 @@ def instance_norm(input, ...@@ -3096,7 +3096,7 @@ def instance_norm(input,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Instance Normalization Layer** **Instance Normalization Layer**
...@@ -3231,7 +3231,7 @@ def data_norm(input, ...@@ -3231,7 +3231,7 @@ def data_norm(input,
sync_stats=False, sync_stats=False,
summary_decay_rate=0.9999999, summary_decay_rate=0.9999999,
enable_scale_and_shift=False): enable_scale_and_shift=False):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Data Normalization Layer** **Data Normalization Layer**
...@@ -3416,7 +3416,7 @@ def layer_norm(input, ...@@ -3416,7 +3416,7 @@ def layer_norm(input,
bias_attr=None, bias_attr=None,
act=None, act=None,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Layer Normalization Layer** **Layer Normalization Layer**
...@@ -3646,7 +3646,7 @@ def group_norm(input, ...@@ -3646,7 +3646,7 @@ def group_norm(input,
@templatedoc() @templatedoc()
def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None): def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Spectral Normalization Layer** **Spectral Normalization Layer**
...@@ -3765,7 +3765,7 @@ def conv2d_transpose(input, ...@@ -3765,7 +3765,7 @@ def conv2d_transpose(input,
act=None, act=None,
name=None, name=None,
data_format='NCHW'): data_format='NCHW'):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The convolution2D transpose layer calculates the output based on the input, The convolution2D transpose layer calculates the output based on the input,
...@@ -4057,7 +4057,7 @@ def conv3d_transpose(input, ...@@ -4057,7 +4057,7 @@ def conv3d_transpose(input,
act=None, act=None,
name=None, name=None,
data_format='NCDHW'): data_format='NCDHW'):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The convolution3D transpose layer calculates the output based on the input, The convolution3D transpose layer calculates the output based on the input,
...@@ -4961,7 +4961,7 @@ def split(input, num_or_sections, dim=-1, name=None): ...@@ -4961,7 +4961,7 @@ def split(input, num_or_sections, dim=-1, name=None):
def l2_normalize(x, axis, epsilon=1e-12, name=None): def l2_normalize(x, axis, epsilon=1e-12, name=None):
""" r"""
This op normalizes `x` along dimension `axis` using an L2 This op normalizes `x` along dimension `axis` using an L2
norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
...@@ -5286,7 +5286,7 @@ def ctc_greedy_decoder(input, ...@@ -5286,7 +5286,7 @@ def ctc_greedy_decoder(input,
input_length=None, input_length=None,
padding_value=0, padding_value=0,
name=None): name=None):
""" r"""
This op is used to decode sequences by greedy policy by the following steps: This op is used to decode sequences by greedy policy by the following steps:
1. Get the indexes of maximum value for each row in input. a.k.a. 1. Get the indexes of maximum value for each row in input. a.k.a.
...@@ -5538,7 +5538,7 @@ def im2sequence(input, ...@@ -5538,7 +5538,7 @@ def im2sequence(input,
input_image_size=None, input_image_size=None,
out_stride=1, out_stride=1,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Extracts image patches from the input tensor to form a tensor of shape Extracts image patches from the input tensor to form a tensor of shape
...@@ -6046,7 +6046,7 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ...@@ -6046,7 +6046,7 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
""" r"""
:alias_main: paddle.reshape :alias_main: paddle.reshape
:alias: paddle.reshape,paddle.tensor.reshape,paddle.tensor.manipulation.reshape :alias: paddle.reshape,paddle.tensor.reshape,paddle.tensor.manipulation.reshape
...@@ -6535,7 +6535,7 @@ def lod_append(x, level): ...@@ -6535,7 +6535,7 @@ def lod_append(x, level):
def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None, def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None,
data_format='NCHW'): data_format='NCHW'):
""" r"""
:alias_main: paddle.nn.functional.lrn :alias_main: paddle.nn.functional.lrn
:alias: paddle.nn.functional.lrn,paddle.nn.functional.norm.lrn :alias: paddle.nn.functional.lrn,paddle.nn.functional.norm.lrn
:old_api: paddle.fluid.layers.lrn :old_api: paddle.fluid.layers.lrn
...@@ -6625,7 +6625,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None, ...@@ -6625,7 +6625,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None,
def pad(x, paddings, pad_value=0., name=None): def pad(x, paddings, pad_value=0., name=None):
""" r"""
:alias_main: paddle.nn.functional.pad :alias_main: paddle.nn.functional.pad
:alias: paddle.nn.functional.pad,paddle.nn.functional.common.pad :alias: paddle.nn.functional.pad,paddle.nn.functional.common.pad
:old_api: paddle.fluid.layers.pad :old_api: paddle.fluid.layers.pad
...@@ -6695,7 +6695,7 @@ def pad(x, paddings, pad_value=0., name=None): ...@@ -6695,7 +6695,7 @@ def pad(x, paddings, pad_value=0., name=None):
def pad_constant_like(x, y, pad_value=0., name=None): def pad_constant_like(x, y, pad_value=0., name=None):
""" r"""
Pad :attr:`y` with :attr:`pad_value`, the number of values padded to Pad :attr:`y` with :attr:`pad_value`, the number of values padded to
the edges of each axis is specified by the difference of the shape the edges of each axis is specified by the difference of the shape
of :attr:`x` and :attr:`y` . ((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n)) of :attr:`x` and :attr:`y` . ((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n))
...@@ -6794,7 +6794,7 @@ def label_smooth(label, ...@@ -6794,7 +6794,7 @@ def label_smooth(label,
epsilon=0.1, epsilon=0.1,
dtype="float32", dtype="float32",
name=None): name=None):
""" r"""
:alias_main: paddle.nn.functional.label_smooth :alias_main: paddle.nn.functional.label_smooth
:alias: paddle.nn.functional.label_smooth,paddle.nn.functional.common.label_smooth :alias: paddle.nn.functional.label_smooth,paddle.nn.functional.common.label_smooth
:old_api: paddle.fluid.layers.label_smooth :old_api: paddle.fluid.layers.label_smooth
...@@ -7067,7 +7067,7 @@ def roi_align(input, ...@@ -7067,7 +7067,7 @@ def roi_align(input,
def dice_loss(input, label, epsilon=0.00001, name=None): def dice_loss(input, label, epsilon=0.00001, name=None):
""" r"""
Dice loss for comparing the similarity between the input predictions and the label. Dice loss for comparing the similarity between the input predictions and the label.
This implementation is for binary classification, where the input is sigmoid This implementation is for binary classification, where the input is sigmoid
...@@ -8500,7 +8500,7 @@ def scatter(input, index, updates, name=None, overwrite=True): ...@@ -8500,7 +8500,7 @@ def scatter(input, index, updates, name=None, overwrite=True):
def scatter_nd_add(ref, index, updates, name=None): def scatter_nd_add(ref, index, updates, name=None):
""" r"""
**Scatter_nd_add Layer** **Scatter_nd_add Layer**
Output is obtained by applying sparse addition to a single value Output is obtained by applying sparse addition to a single value
...@@ -8686,7 +8686,7 @@ def random_crop(x, shape, seed=None): ...@@ -8686,7 +8686,7 @@ def random_crop(x, shape, seed=None):
def log(x, name=None): def log(x, name=None):
""" r"""
Calculates the natural log of the given input tensor, element-wise. Calculates the natural log of the given input tensor, element-wise.
.. math:: .. math::
...@@ -8768,7 +8768,7 @@ def relu(x, name=None): ...@@ -8768,7 +8768,7 @@ def relu(x, name=None):
@deprecated(since="2.0.0", update_to="paddle.nn.functional.selu") @deprecated(since="2.0.0", update_to="paddle.nn.functional.selu")
def selu(x, scale=None, alpha=None, name=None): def selu(x, scale=None, alpha=None, name=None):
""" r"""
Selu Operator. Selu Operator.
...@@ -8836,7 +8836,7 @@ def selu(x, scale=None, alpha=None, name=None): ...@@ -8836,7 +8836,7 @@ def selu(x, scale=None, alpha=None, name=None):
def mean_iou(input, label, num_classes): def mean_iou(input, label, num_classes):
""" r"""
Mean Intersection-Over-Union is a common evaluation metric for Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes. semantic class and then computes the average over classes.
...@@ -9640,7 +9640,7 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None): ...@@ -9640,7 +9640,7 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None):
@templatedoc() @templatedoc()
def swish(x, beta=1.0, name=None): def swish(x, beta=1.0, name=None):
""" r"""
:alias_main: paddle.nn.functional.swish :alias_main: paddle.nn.functional.swish
:alias: paddle.nn.functional.swish,paddle.nn.functional.activation.swish :alias: paddle.nn.functional.swish,paddle.nn.functional.activation.swish
:old_api: paddle.fluid.layers.swish :old_api: paddle.fluid.layers.swish
...@@ -9725,7 +9725,7 @@ def swish(x, beta=1.0, name=None): ...@@ -9725,7 +9725,7 @@ def swish(x, beta=1.0, name=None):
@deprecated(since="2.0.0", update_to="paddle.static.nn.prelu") @deprecated(since="2.0.0", update_to="paddle.static.nn.prelu")
def prelu(x, mode, param_attr=None, name=None): def prelu(x, mode, param_attr=None, name=None):
""" r"""
prelu activation. prelu activation.
.. math:: .. math::
...@@ -9883,7 +9883,7 @@ def leaky_relu(x, alpha=0.02, name=None): ...@@ -9883,7 +9883,7 @@ def leaky_relu(x, alpha=0.02, name=None):
def soft_relu(x, threshold=40.0, name=None): def soft_relu(x, threshold=40.0, name=None):
""" r"""
SoftRelu Activation Operator. SoftRelu Activation Operator.
...@@ -9932,7 +9932,7 @@ def soft_relu(x, threshold=40.0, name=None): ...@@ -9932,7 +9932,7 @@ def soft_relu(x, threshold=40.0, name=None):
def flatten(x, axis=1, name=None): def flatten(x, axis=1, name=None):
""" r"""
**Flatten op** **Flatten op**
Flatten the input tensor into a 2D matrix. Flatten the input tensor into a 2D matrix.
...@@ -12153,7 +12153,7 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): ...@@ -12153,7 +12153,7 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
def logical_and(x, y, out=None, name=None): def logical_and(x, y, out=None, name=None):
""" r"""
``logical_and`` operator computes element-wise logical AND on ``x`` and ``y``, and returns ``out``. ``x``, ``y`` and ``out`` are N-dim boolean ``Tensor``. ``logical_and`` operator computes element-wise logical AND on ``x`` and ``y``, and returns ``out``. ``x``, ``y`` and ``out`` are N-dim boolean ``Tensor``.
Each element of ``out`` is calculated by Each element of ``out`` is calculated by
...@@ -12230,7 +12230,7 @@ def logical_or(x, y, out=None, name=None): ...@@ -12230,7 +12230,7 @@ def logical_or(x, y, out=None, name=None):
def logical_xor(x, y, out=None, name=None): def logical_xor(x, y, out=None, name=None):
""" r"""
``logical_xor`` operator computes element-wise logical XOR on ``x`` and ``y``, and returns ``out``. ``x``, ``y`` and ``out`` are N-dim boolean ``Tensor``. ``logical_xor`` operator computes element-wise logical XOR on ``x`` and ``y``, and returns ``out``. ``x``, ``y`` and ``out`` are N-dim boolean ``Tensor``.
Each element of ``out`` is calculated by Each element of ``out`` is calculated by
...@@ -12565,7 +12565,7 @@ def maxout(x, groups, name=None, axis=1): ...@@ -12565,7 +12565,7 @@ def maxout(x, groups, name=None, axis=1):
def space_to_depth(x, blocksize, name=None): def space_to_depth(x, blocksize, name=None):
""" r"""
Gives a blocksize to space_to_depth the input LoDtensor with Layout: [batch, channel, height, width] Gives a blocksize to space_to_depth the input LoDtensor with Layout: [batch, channel, height, width]
...@@ -12753,7 +12753,7 @@ def affine_channel(x, ...@@ -12753,7 +12753,7 @@ def affine_channel(x,
def similarity_focus(input, axis, indexes, name=None): def similarity_focus(input, axis, indexes, name=None):
""" r"""
SimilarityFocus Operator SimilarityFocus Operator
Generate a similarity focus mask with the same shape of input using the following method: Generate a similarity focus mask with the same shape of input using the following method:
...@@ -13034,7 +13034,7 @@ def grid_sampler(x, grid, name=None): ...@@ -13034,7 +13034,7 @@ def grid_sampler(x, grid, name=None):
def log_loss(input, label, epsilon=1e-4, name=None): def log_loss(input, label, epsilon=1e-4, name=None):
""" r"""
**Negative Log Loss Layer** **Negative Log Loss Layer**
...@@ -13086,7 +13086,7 @@ def log_loss(input, label, epsilon=1e-4, name=None): ...@@ -13086,7 +13086,7 @@ def log_loss(input, label, epsilon=1e-4, name=None):
def add_position_encoding(input, alpha, beta, name=None): def add_position_encoding(input, alpha, beta, name=None):
""" r"""
This operator performs weighted sum of input feature at each position This operator performs weighted sum of input feature at each position
(position in the sequence) and the corresponding position encoding. (position in the sequence) and the corresponding position encoding.
...@@ -13160,7 +13160,7 @@ def bilinear_tensor_product(x, ...@@ -13160,7 +13160,7 @@ def bilinear_tensor_product(x,
name=None, name=None,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Bilinear Tensor Product Layer** **Bilinear Tensor Product Layer**
...@@ -13987,7 +13987,7 @@ def fsp_matrix(x, y): ...@@ -13987,7 +13987,7 @@ def fsp_matrix(x, y):
def continuous_value_model(input, cvm, use_cvm=True): def continuous_value_model(input, cvm, use_cvm=True):
""" r"""
**continuous_value_model layers** **continuous_value_model layers**
...@@ -14092,7 +14092,7 @@ def where(condition): ...@@ -14092,7 +14092,7 @@ def where(condition):
@deprecated(since="2.0.0", update_to="paddle.sign") @deprecated(since="2.0.0", update_to="paddle.sign")
def sign(x): def sign(x):
""" r"""
This OP returns sign of every element in `x`: 1 for positive, -1 for negative and 0 for zero. This OP returns sign of every element in `x`: 1 for positive, -1 for negative and 0 for zero.
Args: Args:
...@@ -14125,7 +14125,7 @@ def sign(x): ...@@ -14125,7 +14125,7 @@ def sign(x):
def unique(x, dtype='int32'): def unique(x, dtype='int32'):
""" r"""
Return a unique tensor for `x` and an index tensor pointing to this unique tensor. Return a unique tensor for `x` and an index tensor pointing to this unique tensor.
Args: Args:
...@@ -14164,7 +14164,7 @@ def unique(x, dtype='int32'): ...@@ -14164,7 +14164,7 @@ def unique(x, dtype='int32'):
def unique_with_counts(x, dtype='int32'): def unique_with_counts(x, dtype='int32'):
""" r"""
This OP return a unique tensor for `x` , and count tensor that the count of unique result in raw input, \ This OP return a unique tensor for `x` , and count tensor that the count of unique result in raw input, \
and an index tensor pointing to this unique tensor. and an index tensor pointing to this unique tensor.
...@@ -14236,7 +14236,7 @@ def deformable_conv(input, ...@@ -14236,7 +14236,7 @@ def deformable_conv(input,
bias_attr=None, bias_attr=None,
modulated=True, modulated=True,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Deformable Convolution op** **Deformable Convolution op**
...@@ -14453,7 +14453,7 @@ def deformable_conv(input, ...@@ -14453,7 +14453,7 @@ def deformable_conv(input,
def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None): def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
""" r"""
This op returns a col buffer of sliding local blocks of input x, also known This op returns a col buffer of sliding local blocks of input x, also known
as im2col for batched 2D image tensors. For each block under the convolution filter, as im2col for batched 2D image tensors. For each block under the convolution filter,
...@@ -14590,7 +14590,7 @@ def deformable_roi_pooling(input, ...@@ -14590,7 +14590,7 @@ def deformable_roi_pooling(input,
trans_std=0.1, trans_std=0.1,
position_sensitive=False, position_sensitive=False,
name=None): name=None):
""" r"""
Deformable ROI Pooling Layer Deformable ROI Pooling Layer
...@@ -14821,7 +14821,7 @@ def shard_index(input, index_num, nshards, shard_id, ignore_value=-1): ...@@ -14821,7 +14821,7 @@ def shard_index(input, index_num, nshards, shard_id, ignore_value=-1):
@templatedoc() @templatedoc()
def hard_swish(x, threshold=6.0, scale=6.0, offset=3.0, name=None): def hard_swish(x, threshold=6.0, scale=6.0, offset=3.0, name=None):
""" r"""
This operator implements the hard_swish activation function. This operator implements the hard_swish activation function.
Hard_swish is proposed in MobileNetV3, and performs better in computational stability and efficiency compared to swish function. Hard_swish is proposed in MobileNetV3, and performs better in computational stability and efficiency compared to swish function.
For more details please refer to: https://arxiv.org/pdf/1905.02244.pdf For more details please refer to: https://arxiv.org/pdf/1905.02244.pdf
...@@ -14890,7 +14890,7 @@ def hard_swish(x, threshold=6.0, scale=6.0, offset=3.0, name=None): ...@@ -14890,7 +14890,7 @@ def hard_swish(x, threshold=6.0, scale=6.0, offset=3.0, name=None):
@templatedoc() @templatedoc()
def mish(x, threshold=20, name=None): def mish(x, threshold=20, name=None):
""" r"""
This operator implements the mish activation function. This operator implements the mish activation function.
Refer to `Mish: A Self Regularized Non-Monotonic Neural Refer to `Mish: A Self Regularized Non-Monotonic Neural
Activation Function <https://arxiv.org/abs/1908.08681>`_ Activation Function <https://arxiv.org/abs/1908.08681>`_
...@@ -14964,7 +14964,7 @@ def mish(x, threshold=20, name=None): ...@@ -14964,7 +14964,7 @@ def mish(x, threshold=20, name=None):
def gather_tree(ids, parents): def gather_tree(ids, parents):
""" r"""
To be used after beam search. After beam search, we get selected ids at To be used after beam search. After beam search, we get selected ids at
each time step and the corresponding parents in the search tree. Both ids each time step and the corresponding parents in the search tree. Both ids
and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then
......
...@@ -413,7 +413,7 @@ def softshrink(x, alpha=None): ...@@ -413,7 +413,7 @@ def softshrink(x, alpha=None):
return _softshrink_(**kwargs) return _softshrink_(**kwargs)
softshrink.__doc__ = """ softshrink.__doc__ = r"""
:alias_main: paddle.nn.functional.softshrink :alias_main: paddle.nn.functional.softshrink
:alias: paddle.nn.functional.softshrink,paddle.nn.functional.activation.softshrink :alias: paddle.nn.functional.softshrink,paddle.nn.functional.activation.softshrink
:old_api: paddle.fluid.layers.softshrink :old_api: paddle.fluid.layers.softshrink
...@@ -530,7 +530,7 @@ def thresholded_relu(x, threshold=None): ...@@ -530,7 +530,7 @@ def thresholded_relu(x, threshold=None):
return _thresholded_relu_(**kwargs) return _thresholded_relu_(**kwargs)
thresholded_relu.__doc__ = """ thresholded_relu.__doc__ = r"""
:alias_main: paddle.nn.functional.thresholded_relu :alias_main: paddle.nn.functional.thresholded_relu
:alias: paddle.nn.functional.thresholded_relu,paddle.nn.functional.activation.thresholded_relu :alias: paddle.nn.functional.thresholded_relu,paddle.nn.functional.activation.thresholded_relu
:old_api: paddle.fluid.layers.thresholded_relu :old_api: paddle.fluid.layers.thresholded_relu
...@@ -617,7 +617,7 @@ def gelu(x, approximate=False): ...@@ -617,7 +617,7 @@ def gelu(x, approximate=False):
return _gelu_(**kwargs) return _gelu_(**kwargs)
gelu.__doc__ = """ gelu.__doc__ = r"""
:strong:`GeLU Activation Operator` :strong:`GeLU Activation Operator`
For more details, see [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415). For more details, see [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415).
...@@ -701,7 +701,7 @@ def erf(x, name=None): ...@@ -701,7 +701,7 @@ def erf(x, name=None):
return _erf_(**kwargs) return _erf_(**kwargs)
erf.__doc__ = """ erf.__doc__ = r"""
:strong:`Erf Operator` :strong:`Erf Operator`
For more details, see [Error function](https://en.wikipedia.org/wiki/Error_function). For more details, see [Error function](https://en.wikipedia.org/wiki/Error_function).
......
...@@ -67,7 +67,7 @@ class RNNCell(object): ...@@ -67,7 +67,7 @@ class RNNCell(object):
""" """
def call(self, inputs, states, **kwargs): def call(self, inputs, states, **kwargs):
""" r"""
Every cell must implement this method to do the calculations mapping the Every cell must implement this method to do the calculations mapping the
inputs and states to the output and new states. inputs and states to the output and new states.
...@@ -97,7 +97,7 @@ class RNNCell(object): ...@@ -97,7 +97,7 @@ class RNNCell(object):
dtype='float32', dtype='float32',
init_value=0, init_value=0,
batch_dim_idx=0): batch_dim_idx=0):
""" r"""
Generate initialized states according to provided shape, data type and Generate initialized states according to provided shape, data type and
value. value.
...@@ -225,7 +225,7 @@ class RNNCell(object): ...@@ -225,7 +225,7 @@ class RNNCell(object):
class GRUCell(RNNCell): class GRUCell(RNNCell):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Gated Recurrent Unit cell. It is a wrapper for Gated Recurrent Unit cell. It is a wrapper for
...@@ -287,7 +287,7 @@ class GRUCell(RNNCell): ...@@ -287,7 +287,7 @@ class GRUCell(RNNCell):
activation, dtype) activation, dtype)
def call(self, inputs, states): def call(self, inputs, states):
""" r"""
Perform calculations of GRU. Perform calculations of GRU.
Parameters: Parameters:
...@@ -323,7 +323,7 @@ class GRUCell(RNNCell): ...@@ -323,7 +323,7 @@ class GRUCell(RNNCell):
class LSTMCell(RNNCell): class LSTMCell(RNNCell):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Long-Short Term Memory cell. It is a wrapper for Long-Short Term Memory cell. It is a wrapper for
...@@ -390,7 +390,7 @@ class LSTMCell(RNNCell): ...@@ -390,7 +390,7 @@ class LSTMCell(RNNCell):
activation, forget_bias, dtype) activation, forget_bias, dtype)
def call(self, inputs, states): def call(self, inputs, states):
""" r"""
Perform calculations of LSTM. Perform calculations of LSTM.
Parameters: Parameters:
...@@ -782,7 +782,7 @@ class Decoder(object): ...@@ -782,7 +782,7 @@ class Decoder(object):
""" """
def initialize(self, inits): def initialize(self, inits):
""" r"""
Called once before the decoding iterations. Called once before the decoding iterations.
Parameters: Parameters:
...@@ -797,7 +797,7 @@ class Decoder(object): ...@@ -797,7 +797,7 @@ class Decoder(object):
raise NotImplementedError raise NotImplementedError
def step(self, time, inputs, states, **kwargs): def step(self, time, inputs, states, **kwargs):
""" r"""
Called per step of decoding. Called per step of decoding.
Parameters: Parameters:
...@@ -818,7 +818,7 @@ class Decoder(object): ...@@ -818,7 +818,7 @@ class Decoder(object):
raise NotImplementedError raise NotImplementedError
def finalize(self, outputs, final_states, sequence_lengths): def finalize(self, outputs, final_states, sequence_lengths):
""" r"""
Called once after the decoding iterations if implemented. Called once after the decoding iterations if implemented.
Parameters: Parameters:
...@@ -931,7 +931,7 @@ class BeamSearchDecoder(Decoder): ...@@ -931,7 +931,7 @@ class BeamSearchDecoder(Decoder):
@staticmethod @staticmethod
def tile_beam_merge_with_batch(x, beam_size): def tile_beam_merge_with_batch(x, beam_size):
""" r"""
Tile the batch dimension of a tensor. Specifically, this function takes Tile the batch dimension of a tensor. Specifically, this function takes
a tensor t shaped `[batch_size, s0, s1, ...]` composed of minibatch a tensor t shaped `[batch_size, s0, s1, ...]` composed of minibatch
entries `t[0], ..., t[batch_size - 1]` and tiles it to have a shape entries `t[0], ..., t[batch_size - 1]` and tiles it to have a shape
...@@ -966,7 +966,7 @@ class BeamSearchDecoder(Decoder): ...@@ -966,7 +966,7 @@ class BeamSearchDecoder(Decoder):
return x return x
def _split_batch_beams(self, x): def _split_batch_beams(self, x):
""" r"""
Reshape a tensor with shape `[batch_size * beam_size, ...]` to a new Reshape a tensor with shape `[batch_size * beam_size, ...]` to a new
tensor with shape `[batch_size, beam_size, ...]`. tensor with shape `[batch_size, beam_size, ...]`.
...@@ -983,7 +983,7 @@ class BeamSearchDecoder(Decoder): ...@@ -983,7 +983,7 @@ class BeamSearchDecoder(Decoder):
return nn.reshape(x, shape=[-1, self.beam_size] + list(x.shape[1:])) return nn.reshape(x, shape=[-1, self.beam_size] + list(x.shape[1:]))
def _merge_batch_beams(self, x): def _merge_batch_beams(self, x):
""" r"""
Reshape a tensor with shape `[batch_size, beam_size, ...]` to a new Reshape a tensor with shape `[batch_size, beam_size, ...]` to a new
tensor with shape `[batch_size * beam_size, ...]`. tensor with shape `[batch_size * beam_size, ...]`.
...@@ -1000,7 +1000,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1000,7 +1000,7 @@ class BeamSearchDecoder(Decoder):
return nn.reshape(x, shape=[-1] + list(x.shape[2:])) return nn.reshape(x, shape=[-1] + list(x.shape[2:]))
def _expand_to_beam_size(self, x): def _expand_to_beam_size(self, x):
""" r"""
This function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed This function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed
of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a
shape `[batch_size, beam_size, s0, s1, ...]` composed of minibatch entries shape `[batch_size, beam_size, s0, s1, ...]` composed of minibatch entries
...@@ -1023,7 +1023,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1023,7 +1023,7 @@ class BeamSearchDecoder(Decoder):
return x return x
def _mask_probs(self, probs, finished): def _mask_probs(self, probs, finished):
""" r"""
Mask log probabilities. It forces finished beams to allocate all probability Mask log probabilities. It forces finished beams to allocate all probability
mass to eos and unfinished beams to remain unchanged. mass to eos and unfinished beams to remain unchanged.
...@@ -1052,7 +1052,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1052,7 +1052,7 @@ class BeamSearchDecoder(Decoder):
return probs return probs
def _gather(self, x, indices, batch_size): def _gather(self, x, indices, batch_size):
""" r"""
Gather from the tensor `x` using `indices`. Gather from the tensor `x` using `indices`.
Parameters: Parameters:
...@@ -1104,7 +1104,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1104,7 +1104,7 @@ class BeamSearchDecoder(Decoder):
pass pass
def initialize(self, initial_cell_states): def initialize(self, initial_cell_states):
""" r"""
Initialize the BeamSearchDecoder. Initialize the BeamSearchDecoder.
Parameters: Parameters:
...@@ -1162,7 +1162,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1162,7 +1162,7 @@ class BeamSearchDecoder(Decoder):
init_lengths), init_finished init_lengths), init_finished
def _beam_search_step(self, time, logits, next_cell_states, beam_state): def _beam_search_step(self, time, logits, next_cell_states, beam_state):
""" r"""
Calculate scores and select candidate token ids. Calculate scores and select candidate token ids.
Parameters: Parameters:
...@@ -1235,7 +1235,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1235,7 +1235,7 @@ class BeamSearchDecoder(Decoder):
return beam_search_output, beam_search_state return beam_search_output, beam_search_state
def step(self, time, inputs, states, **kwargs): def step(self, time, inputs, states, **kwargs):
""" r"""
Perform a beam search decoding step, which uses `cell` to get probabilities, Perform a beam search decoding step, which uses `cell` to get probabilities,
and follows a beam search step to calculate scores and select candidate and follows a beam search step to calculate scores and select candidate
token ids. token ids.
...@@ -1287,7 +1287,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1287,7 +1287,7 @@ class BeamSearchDecoder(Decoder):
return (beam_search_output, beam_search_state, next_inputs, finished) return (beam_search_output, beam_search_state, next_inputs, finished)
def finalize(self, outputs, final_states, sequence_lengths): def finalize(self, outputs, final_states, sequence_lengths):
""" r"""
Use `gather_tree` to backtrace along the beam search tree and construct Use `gather_tree` to backtrace along the beam search tree and construct
the full predicted sequences. the full predicted sequences.
...@@ -1572,7 +1572,7 @@ def dynamic_decode(decoder, ...@@ -1572,7 +1572,7 @@ def dynamic_decode(decoder,
is_test=False, is_test=False,
return_length=False, return_length=False,
**kwargs): **kwargs):
""" r"""
Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
Tensor indicating finished status contains all True values or the number of Tensor indicating finished status contains all True values or the number of
decoding step reaches to :attr:`max_step_num`. decoding step reaches to :attr:`max_step_num`.
...@@ -1664,7 +1664,7 @@ class DecodeHelper(object): ...@@ -1664,7 +1664,7 @@ class DecodeHelper(object):
""" """
def initialize(self): def initialize(self):
""" r"""
DecodeHelper initialization to produce inputs for the first decoding step DecodeHelper initialization to produce inputs for the first decoding step
and give the initial status telling whether each sequence in the batch and give the initial status telling whether each sequence in the batch
is finished. It is the partial of the initialization of `BasicDecoder`. is finished. It is the partial of the initialization of `BasicDecoder`.
...@@ -1698,7 +1698,7 @@ class DecodeHelper(object): ...@@ -1698,7 +1698,7 @@ class DecodeHelper(object):
pass pass
def next_inputs(self, time, outputs, states, sample_ids): def next_inputs(self, time, outputs, states, sample_ids):
""" r"""
Produce the inputs and states for next time step and give status telling Produce the inputs and states for next time step and give status telling
whether each minibatch entry is finished. It is called after `sample` in whether each minibatch entry is finished. It is called after `sample` in
`BasicDecoder.step`. It is the partial of `BasicDecoder.step`. `BasicDecoder.step`. It is the partial of `BasicDecoder.step`.
...@@ -1787,7 +1787,7 @@ class TrainingHelper(DecodeHelper): ...@@ -1787,7 +1787,7 @@ class TrainingHelper(DecodeHelper):
self.inputs) self.inputs)
def initialize(self): def initialize(self):
""" r"""
TrainingHelper initialization produces inputs for the first decoding TrainingHelper initialization produces inputs for the first decoding
step by slicing at the first time step of full sequence inputs, and it step by slicing at the first time step of full sequence inputs, and it
gives initial status telling whether each sequence in the batch is gives initial status telling whether each sequence in the batch is
...@@ -1809,7 +1809,7 @@ class TrainingHelper(DecodeHelper): ...@@ -1809,7 +1809,7 @@ class TrainingHelper(DecodeHelper):
return init_inputs, init_finished return init_inputs, init_finished
def sample(self, time, outputs, states): def sample(self, time, outputs, states):
""" r"""
Perform sampling by using `argmax` according to the `outputs`. Mostly Perform sampling by using `argmax` according to the `outputs`. Mostly
the sampled ids would not be used since the inputs for next decoding the sampled ids would not be used since the inputs for next decoding
step would be got by slicing. step would be got by slicing.
...@@ -1832,7 +1832,7 @@ class TrainingHelper(DecodeHelper): ...@@ -1832,7 +1832,7 @@ class TrainingHelper(DecodeHelper):
return sample_ids return sample_ids
def next_inputs(self, time, outputs, states, sample_ids): def next_inputs(self, time, outputs, states, sample_ids):
""" r"""
Generate inputs for the next decoding step by slicing at corresponding Generate inputs for the next decoding step by slicing at corresponding
step of the full sequence inputs. Simultaneously, produce the states step of the full sequence inputs. Simultaneously, produce the states
for next time step by directly using the input `states` and emit status for next time step by directly using the input `states` and emit status
...@@ -1909,7 +1909,7 @@ class GreedyEmbeddingHelper(DecodeHelper): ...@@ -1909,7 +1909,7 @@ class GreedyEmbeddingHelper(DecodeHelper):
""" """
def __init__(self, embedding_fn, start_tokens, end_token): def __init__(self, embedding_fn, start_tokens, end_token):
""" r"""
Constructor of GreedyEmbeddingHelper. Constructor of GreedyEmbeddingHelper.
Parameters: Parameters:
...@@ -1934,7 +1934,7 @@ class GreedyEmbeddingHelper(DecodeHelper): ...@@ -1934,7 +1934,7 @@ class GreedyEmbeddingHelper(DecodeHelper):
shape=[1], dtype="int64", value=end_token) shape=[1], dtype="int64", value=end_token)
def initialize(self): def initialize(self):
""" r"""
GreedyEmbeddingHelper initialization produces inputs for the first decoding GreedyEmbeddingHelper initialization produces inputs for the first decoding
step by using `start_tokens` of the constructor, and gives initial step by using `start_tokens` of the constructor, and gives initial
status telling whether each sequence in the batch is finished. status telling whether each sequence in the batch is finished.
...@@ -1957,7 +1957,7 @@ class GreedyEmbeddingHelper(DecodeHelper): ...@@ -1957,7 +1957,7 @@ class GreedyEmbeddingHelper(DecodeHelper):
return init_inputs, init_finished return init_inputs, init_finished
def sample(self, time, outputs, states): def sample(self, time, outputs, states):
""" r"""
Perform sampling by using `argmax` according to the `outputs`. Perform sampling by using `argmax` according to the `outputs`.
Parameters: Parameters:
...@@ -1978,7 +1978,7 @@ class GreedyEmbeddingHelper(DecodeHelper): ...@@ -1978,7 +1978,7 @@ class GreedyEmbeddingHelper(DecodeHelper):
return sample_ids return sample_ids
def next_inputs(self, time, outputs, states, sample_ids): def next_inputs(self, time, outputs, states, sample_ids):
""" r"""
Generate inputs for the next decoding step by applying `embedding_fn` Generate inputs for the next decoding step by applying `embedding_fn`
to `sample_ids`. Simultaneously, produce the states for next time step to `sample_ids`. Simultaneously, produce the states for next time step
by directly using the input `states` and emit status telling whether by directly using the input `states` and emit status telling whether
...@@ -2046,7 +2046,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): ...@@ -2046,7 +2046,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper):
end_token, end_token,
softmax_temperature=None, softmax_temperature=None,
seed=None): seed=None):
""" r"""
Constructor of SampleEmbeddingHelper. Constructor of SampleEmbeddingHelper.
Parameters: Parameters:
...@@ -2080,7 +2080,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): ...@@ -2080,7 +2080,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper):
self.seed = seed self.seed = seed
def sample(self, time, outputs, states): def sample(self, time, outputs, states):
""" r"""
Perform sampling from a categorical distribution, and the distribution Perform sampling from a categorical distribution, and the distribution
is computed by `softmax(outputs/softmax_temperature)`. is computed by `softmax(outputs/softmax_temperature)`.
...@@ -2165,7 +2165,7 @@ class BasicDecoder(Decoder): ...@@ -2165,7 +2165,7 @@ class BasicDecoder(Decoder):
self.output_fn = output_fn self.output_fn = output_fn
def initialize(self, initial_cell_states): def initialize(self, initial_cell_states):
""" r"""
BasicDecoder initialization includes helper initialization and cell BasicDecoder initialization includes helper initialization and cell
initialization, and cell initialization uses `initial_cell_states` as initialization, and cell initialization uses `initial_cell_states` as
the result directly. the result directly.
...@@ -2195,7 +2195,7 @@ class BasicDecoder(Decoder): ...@@ -2195,7 +2195,7 @@ class BasicDecoder(Decoder):
pass pass
def step(self, time, inputs, states, **kwargs): def step(self, time, inputs, states, **kwargs):
""" r"""
Perform one decoding step as following steps: Perform one decoding step as following steps:
1. Perform `cell_outputs, cell_states = cell.call(inputs, states)` 1. Perform `cell_outputs, cell_states = cell.call(inputs, states)`
...@@ -2258,7 +2258,7 @@ def dynamic_lstm(input, ...@@ -2258,7 +2258,7 @@ def dynamic_lstm(input,
candidate_activation='tanh', candidate_activation='tanh',
dtype='float32', dtype='float32',
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
...@@ -2430,7 +2430,7 @@ def lstm(input, ...@@ -2430,7 +2430,7 @@ def lstm(input,
name=None, name=None,
default_initializer=None, default_initializer=None,
seed=-1): seed=-1):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
...@@ -2612,7 +2612,7 @@ def dynamic_lstmp(input, ...@@ -2612,7 +2612,7 @@ def dynamic_lstmp(input,
c_0=None, c_0=None,
cell_clip=None, cell_clip=None,
proj_clip=None): proj_clip=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
...@@ -2823,7 +2823,7 @@ def dynamic_gru(input, ...@@ -2823,7 +2823,7 @@ def dynamic_gru(input,
candidate_activation='tanh', candidate_activation='tanh',
h_0=None, h_0=None,
origin_mode=False): origin_mode=False):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Note: The input type of this must be LoDTensor. If the input type to be **Note: The input type of this must be LoDTensor. If the input type to be
...@@ -2985,7 +2985,7 @@ def gru_unit(input, ...@@ -2985,7 +2985,7 @@ def gru_unit(input,
activation='tanh', activation='tanh',
gate_activation='sigmoid', gate_activation='sigmoid',
origin_mode=False): origin_mode=False):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for
...@@ -3143,7 +3143,7 @@ def beam_search(pre_ids, ...@@ -3143,7 +3143,7 @@ def beam_search(pre_ids,
is_accumulated=True, is_accumulated=True,
name=None, name=None,
return_parent_idx=False): return_parent_idx=False):
""" r"""
Beam search is a classical algorithm for selecting candidate words in a Beam search is a classical algorithm for selecting candidate words in a
machine translation task. machine translation task.
...@@ -3293,7 +3293,7 @@ def beam_search(pre_ids, ...@@ -3293,7 +3293,7 @@ def beam_search(pre_ids,
def beam_search_decode(ids, scores, beam_size, end_id, name=None): def beam_search_decode(ids, scores, beam_size, end_id, name=None):
""" r"""
This operator is used after beam search has completed. It constructs the This operator is used after beam search has completed. It constructs the
full predicted sequences for each sample by walking back along the search full predicted sequences for each sample by walking back along the search
...@@ -3378,7 +3378,7 @@ def lstm_unit(x_t, ...@@ -3378,7 +3378,7 @@ def lstm_unit(x_t,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for
......
...@@ -51,7 +51,7 @@ def sequence_conv(input, ...@@ -51,7 +51,7 @@ def sequence_conv(input,
param_attr=None, param_attr=None,
act=None, act=None,
name=None): name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use conv2d Op.(fluid.layers.** :ref:`api_fluid_layers_conv2d` ). **Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use conv2d Op.(fluid.layers.** :ref:`api_fluid_layers_conv2d` ).
...@@ -175,7 +175,7 @@ def sequence_conv(input, ...@@ -175,7 +175,7 @@ def sequence_conv(input,
def sequence_softmax(input, use_cudnn=False, name=None): def sequence_softmax(input, use_cudnn=False, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
...@@ -259,7 +259,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): ...@@ -259,7 +259,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
def sequence_pool(input, pool_type, is_test=False, pad_value=0.0): def sequence_pool(input, pool_type, is_test=False, pad_value=0.0):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
**Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use pool2d Op.(fluid.layers.** :ref:`api_fluid_layers_pool2d` ). **Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use pool2d Op.(fluid.layers.** :ref:`api_fluid_layers_pool2d` ).
...@@ -636,7 +636,7 @@ def sequence_slice(input, offset, length, name=None): ...@@ -636,7 +636,7 @@ def sequence_slice(input, offset, length, name=None):
def sequence_expand(x, y, ref_level=-1, name=None): def sequence_expand(x, y, ref_level=-1, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Sequence Expand Layer. This layer will expand the input variable ``x`` \ Sequence Expand Layer. This layer will expand the input variable ``x`` \
...@@ -772,7 +772,7 @@ def sequence_expand(x, y, ref_level=-1, name=None): ...@@ -772,7 +772,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
def sequence_expand_as(x, y, name=None): def sequence_expand_as(x, y, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Sequence Expand As Layer. This OP will expand the input variable ``x`` \ Sequence Expand As Layer. This OP will expand the input variable ``x`` \
...@@ -892,7 +892,7 @@ def sequence_expand_as(x, y, name=None): ...@@ -892,7 +892,7 @@ def sequence_expand_as(x, y, name=None):
def sequence_pad(x, pad_value, maxlen=None, name=None): def sequence_pad(x, pad_value, maxlen=None, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
This layer padding the sequences in a same batch to a common length (according \ This layer padding the sequences in a same batch to a common length (according \
...@@ -1233,7 +1233,7 @@ def sequence_scatter(input, index, updates, name=None): ...@@ -1233,7 +1233,7 @@ def sequence_scatter(input, index, updates, name=None):
def sequence_enumerate(input, win_size, pad_value=0, name=None): def sequence_enumerate(input, win_size, pad_value=0, name=None):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Generate a new sequence for the input index sequence with \ Generate a new sequence for the input index sequence with \
...@@ -1301,7 +1301,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None): ...@@ -1301,7 +1301,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
def sequence_mask(x, maxlen=None, dtype='int64', name=None): def sequence_mask(x, maxlen=None, dtype='int64', name=None):
""" r"""
**SequenceMask Layer** **SequenceMask Layer**
This layer outputs a mask according to the input :code:`x` and This layer outputs a mask according to the input :code:`x` and
......
...@@ -343,7 +343,7 @@ def concat(input, axis=0, name=None): ...@@ -343,7 +343,7 @@ def concat(input, axis=0, name=None):
def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False): def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False):
""" r"""
This function concatenates or stacks all tensors in the input LoDTensorArray This function concatenates or stacks all tensors in the input LoDTensorArray
along the axis mentioned and returns that as the output. along the axis mentioned and returns that as the output.
...@@ -452,7 +452,7 @@ def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False): ...@@ -452,7 +452,7 @@ def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False):
def sums(input, out=None): def sums(input, out=None):
""" r"""
This function computes the sum of multiple input Tensors elementwisely. This function computes the sum of multiple input Tensors elementwisely.
- Case 1, sum of 3 Tensors - Case 1, sum of 3 Tensors
...@@ -1391,7 +1391,7 @@ def range(start, end, step, dtype, name=None): ...@@ -1391,7 +1391,7 @@ def range(start, end, step, dtype, name=None):
def linspace(start, stop, num, dtype=None, name=None): def linspace(start, stop, num, dtype=None, name=None):
""" r"""
This OP return fixed number of evenly spaced values within a given interval. This OP return fixed number of evenly spaced values within a given interval.
Args: Args:
...@@ -1527,7 +1527,7 @@ def zeros_like(x, out=None): ...@@ -1527,7 +1527,7 @@ def zeros_like(x, out=None):
@deprecated(since="2.0.0", update_to="paddle.diag") @deprecated(since="2.0.0", update_to="paddle.diag")
def diag(diagonal): def diag(diagonal):
""" r"""
:alias_main: paddle.diag :alias_main: paddle.diag
:alias: paddle.diag,paddle.tensor.diag,paddle.tensor.creation.diag :alias: paddle.diag,paddle.tensor.diag,paddle.tensor.creation.diag
:old_api: paddle.fluid.layers.diag :old_api: paddle.fluid.layers.diag
......
...@@ -475,7 +475,7 @@ class Accuracy(MetricBase): ...@@ -475,7 +475,7 @@ class Accuracy(MetricBase):
self.weight = .0 self.weight = .0
def update(self, value, weight): def update(self, value, weight):
""" r"""
This function takes the minibatch states (value, weight) as input, This function takes the minibatch states (value, weight) as input,
to accumulate and update the corresponding status of the Accuracy object. The update method is as follows: to accumulate and update the corresponding status of the Accuracy object. The update method is as follows:
...@@ -561,7 +561,7 @@ class ChunkEvaluator(MetricBase): ...@@ -561,7 +561,7 @@ class ChunkEvaluator(MetricBase):
self.num_correct_chunks = 0 self.num_correct_chunks = 0
def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks): def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks):
""" r"""
This function takes (num_infer_chunks, num_label_chunks, num_correct_chunks) as input, This function takes (num_infer_chunks, num_label_chunks, num_correct_chunks) as input,
to accumulate and update the corresponding status of the ChunkEvaluator object. The update method is as follows: to accumulate and update the corresponding status of the ChunkEvaluator object. The update method is as follows:
......
...@@ -42,7 +42,7 @@ def simple_img_conv_pool(input, ...@@ -42,7 +42,7 @@ def simple_img_conv_pool(input,
bias_attr=None, bias_attr=None,
act=None, act=None,
use_cudnn=True): use_cudnn=True):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The simple_img_conv_pool api is composed of :ref:`api_fluid_layers_conv2d` and :ref:`api_fluid_layers_pool2d` . The simple_img_conv_pool api is composed of :ref:`api_fluid_layers_conv2d` and :ref:`api_fluid_layers_pool2d` .
...@@ -333,7 +333,7 @@ def sequence_conv_pool(input, ...@@ -333,7 +333,7 @@ def sequence_conv_pool(input,
def glu(input, dim=-1): def glu(input, dim=-1):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The Gated Linear Units(GLU) composed by :ref:`api_fluid_layers_split` , The Gated Linear Units(GLU) composed by :ref:`api_fluid_layers_split` ,
...@@ -384,7 +384,7 @@ def scaled_dot_product_attention(queries, ...@@ -384,7 +384,7 @@ def scaled_dot_product_attention(queries,
values, values,
num_heads=1, num_heads=1,
dropout_rate=0.): dropout_rate=0.):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
This interface Multi-Head Attention using scaled dot product. This interface Multi-Head Attention using scaled dot product.
......
...@@ -954,7 +954,7 @@ class Optimizer(object): ...@@ -954,7 +954,7 @@ class Optimizer(object):
class SGDOptimizer(Optimizer): class SGDOptimizer(Optimizer):
""" r"""
Optimizer of the stochastic gradient descent algorithm. Optimizer of the stochastic gradient descent algorithm.
.. math:: .. math::
...@@ -1048,7 +1048,7 @@ class SGDOptimizer(Optimizer): ...@@ -1048,7 +1048,7 @@ class SGDOptimizer(Optimizer):
class MomentumOptimizer(Optimizer): class MomentumOptimizer(Optimizer):
""" r"""
Simple Momentum optimizer with velocity state Simple Momentum optimizer with velocity state
...@@ -1183,7 +1183,7 @@ class MomentumOptimizer(Optimizer): ...@@ -1183,7 +1183,7 @@ class MomentumOptimizer(Optimizer):
class DGCMomentumOptimizer(Optimizer): class DGCMomentumOptimizer(Optimizer):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887 DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887
...@@ -1603,7 +1603,7 @@ class DGCMomentumOptimizer(Optimizer): ...@@ -1603,7 +1603,7 @@ class DGCMomentumOptimizer(Optimizer):
class LarsMomentumOptimizer(Optimizer): class LarsMomentumOptimizer(Optimizer):
""" r"""
Momentum optimizer with LARS support Momentum optimizer with LARS support
The update equations are as follows: The update equations are as follows:
...@@ -1735,7 +1735,7 @@ class LarsMomentumOptimizer(Optimizer): ...@@ -1735,7 +1735,7 @@ class LarsMomentumOptimizer(Optimizer):
class AdagradOptimizer(Optimizer): class AdagradOptimizer(Optimizer):
""" r"""
The Adaptive Gradient optimizer (Adagrad for short) can adaptively assign The Adaptive Gradient optimizer (Adagrad for short) can adaptively assign
different learning rates to individual parameters. different learning rates to individual parameters.
...@@ -1851,7 +1851,7 @@ class AdagradOptimizer(Optimizer): ...@@ -1851,7 +1851,7 @@ class AdagradOptimizer(Optimizer):
class AdamOptimizer(Optimizer): class AdamOptimizer(Optimizer):
""" r"""
The Adam optimizer uses an optimization described at the end The Adam optimizer uses an optimization described at the end
of section 2 of `Adam paper <https://arxiv.org/abs/1412.6980>`_ , of section 2 of `Adam paper <https://arxiv.org/abs/1412.6980>`_ ,
it can dynamically adjusts the learning rate of each parameter using it can dynamically adjusts the learning rate of each parameter using
...@@ -2117,7 +2117,7 @@ class AdamOptimizer(Optimizer): ...@@ -2117,7 +2117,7 @@ class AdamOptimizer(Optimizer):
class AdamaxOptimizer(Optimizer): class AdamaxOptimizer(Optimizer):
""" r"""
The Adamax optimizer is implemented based on the Adamax Optimization The Adamax optimizer is implemented based on the Adamax Optimization
in Section 7 of `Adam paper <https://arxiv.org/abs/1412.6980>`_. in Section 7 of `Adam paper <https://arxiv.org/abs/1412.6980>`_.
The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm, The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm,
...@@ -2289,7 +2289,7 @@ class AdamaxOptimizer(Optimizer): ...@@ -2289,7 +2289,7 @@ class AdamaxOptimizer(Optimizer):
class DpsgdOptimizer(Optimizer): class DpsgdOptimizer(Optimizer):
""" r"""
We implement the Dpsgd optimizer according to CCS16 paper - We implement the Dpsgd optimizer according to CCS16 paper -
Deep Learning with Differential Privacy. Deep Learning with Differential Privacy.
...@@ -2384,7 +2384,7 @@ class DpsgdOptimizer(Optimizer): ...@@ -2384,7 +2384,7 @@ class DpsgdOptimizer(Optimizer):
class DecayedAdagradOptimizer(Optimizer): class DecayedAdagradOptimizer(Optimizer):
""" r"""
The Decayed Adagrad optimizer can be seen as an Adagrad algorithm that introduces The Decayed Adagrad optimizer can be seen as an Adagrad algorithm that introduces
the decay rate to solve the problem of a sharp drop in the learning rate the decay rate to solve the problem of a sharp drop in the learning rate
during model training when using the AdagradOptimizer. during model training when using the AdagradOptimizer.
...@@ -2494,7 +2494,7 @@ class DecayedAdagradOptimizer(Optimizer): ...@@ -2494,7 +2494,7 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer):
""" r"""
**Notes: This API does not support sparse parameter optimization.** **Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details: Adadelta Optimizer. Please refer to this for details:
...@@ -2613,7 +2613,7 @@ class AdadeltaOptimizer(Optimizer): ...@@ -2613,7 +2613,7 @@ class AdadeltaOptimizer(Optimizer):
class RMSPropOptimizer(Optimizer): class RMSPropOptimizer(Optimizer):
""" r"""
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning
rate method. The original slides proposed RMSProp: Slide 29 of rate method. The original slides proposed RMSProp: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf . http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf .
...@@ -2801,7 +2801,7 @@ class RMSPropOptimizer(Optimizer): ...@@ -2801,7 +2801,7 @@ class RMSPropOptimizer(Optimizer):
class FtrlOptimizer(Optimizer): class FtrlOptimizer(Optimizer):
""" r"""
FTRL (Follow The Regularized Leader) Optimizer. FTRL (Follow The Regularized Leader) Optimizer.
The paper that proposed Follow The Regularized Leader (FTRL): The paper that proposed Follow The Regularized Leader (FTRL):
...@@ -2960,7 +2960,7 @@ class FtrlOptimizer(Optimizer): ...@@ -2960,7 +2960,7 @@ class FtrlOptimizer(Optimizer):
class LambOptimizer(AdamOptimizer): class LambOptimizer(AdamOptimizer):
""" r"""
LAMB (Layer-wise Adaptive Moments optimizer for Batching training) Optimizer. LAMB (Layer-wise Adaptive Moments optimizer for Batching training) Optimizer.
LAMB Optimizer is designed to scale up the batch size of training without losing LAMB Optimizer is designed to scale up the batch size of training without losing
...@@ -3132,7 +3132,7 @@ Lamb = LambOptimizer ...@@ -3132,7 +3132,7 @@ Lamb = LambOptimizer
class ModelAverage(Optimizer): class ModelAverage(Optimizer):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
The ModelAverage optimizer accumulates specific continuous historical parameters The ModelAverage optimizer accumulates specific continuous historical parameters
...@@ -3441,7 +3441,7 @@ class ModelAverage(Optimizer): ...@@ -3441,7 +3441,7 @@ class ModelAverage(Optimizer):
class ExponentialMovingAverage(object): class ExponentialMovingAverage(object):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Compute the moving average of parameters with exponential decay. Compute the moving average of parameters with exponential decay.
...@@ -4795,7 +4795,7 @@ class RecomputeOptimizer(Optimizer): ...@@ -4795,7 +4795,7 @@ class RecomputeOptimizer(Optimizer):
class LookaheadOptimizer(object): class LookaheadOptimizer(object):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
This implements the Lookahead optimizer of the This implements the Lookahead optimizer of the
......
...@@ -210,7 +210,7 @@ class ParamAttr(object): ...@@ -210,7 +210,7 @@ class ParamAttr(object):
class WeightNormParamAttr(ParamAttr): class WeightNormParamAttr(ParamAttr):
""" r"""
:api_attr: Static Graph :api_attr: Static Graph
Note: Note:
......
...@@ -1325,7 +1325,7 @@ class GeneratorLoader(DataLoaderBase): ...@@ -1325,7 +1325,7 @@ class GeneratorLoader(DataLoaderBase):
class PyReader(DataLoaderBase): class PyReader(DataLoaderBase):
""" r"""
Create a reader object for data feeding in Python. Create a reader object for data feeding in Python.
Data would be prefetched using Python thread and be pushed Data would be prefetched using Python thread and be pushed
into a queue asynchronously. Data in the queue would be extracted into a queue asynchronously. Data in the queue would be extracted
......
...@@ -63,7 +63,7 @@ def _create_regularization_of_grad(param, grad, regularization=None): ...@@ -63,7 +63,7 @@ def _create_regularization_of_grad(param, grad, regularization=None):
def append_regularization_ops(parameters_and_grads, regularization=None): def append_regularization_ops(parameters_and_grads, regularization=None):
"""Create and add backward regularization Operators r"""Create and add backward regularization Operators
Creates and adds backward regularization operators in the BlockDesc. Creates and adds backward regularization operators in the BlockDesc.
This will add gradients of the regularizer function to the gradients This will add gradients of the regularizer function to the gradients
...@@ -132,7 +132,7 @@ class WeightDecayRegularizer(object): ...@@ -132,7 +132,7 @@ class WeightDecayRegularizer(object):
class L2DecayRegularizer(WeightDecayRegularizer): class L2DecayRegularizer(WeightDecayRegularizer):
""" r"""
Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ).
...@@ -239,7 +239,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): ...@@ -239,7 +239,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
class L1DecayRegularizer(WeightDecayRegularizer): class L1DecayRegularizer(WeightDecayRegularizer):
""" r"""
Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse.
It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ).
......
...@@ -204,8 +204,8 @@ def train(word_idx): ...@@ -204,8 +204,8 @@ def train(word_idx):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
re.compile("train/pos/.*\.txt$"), re.compile(r"train/pos/.*\.txt$"),
re.compile("train/neg/.*\.txt$"), word_idx) re.compile(r"train/neg/.*\.txt$"), word_idx)
def test(word_idx): def test(word_idx):
...@@ -221,8 +221,8 @@ def test(word_idx): ...@@ -221,8 +221,8 @@ def test(word_idx):
:rtype: callable :rtype: callable
""" """
return reader_creator( return reader_creator(
re.compile("test/pos/.*\.txt$"), re.compile(r"test/pos/.*\.txt$"),
re.compile("test/neg/.*\.txt$"), word_idx) re.compile(r"test/neg/.*\.txt$"), word_idx)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -230,7 +230,7 @@ class SoftsignLayer(object): ...@@ -230,7 +230,7 @@ class SoftsignLayer(object):
class FC(Layer): class FC(Layer):
""" r"""
This interface is used to construct a callable object of the ``FC`` class. This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples. For more details, refer to code examples.
It creates a fully connected layer in the network. It can take It creates a fully connected layer in the network. It can take
......
...@@ -227,7 +227,7 @@ class SoftsignLayer(object): ...@@ -227,7 +227,7 @@ class SoftsignLayer(object):
class FC(paddle.nn.Layer): class FC(paddle.nn.Layer):
""" r"""
This interface is used to construct a callable object of the ``FC`` class. This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples. For more details, refer to code examples.
It creates a fully connected layer in the network. It can take It creates a fully connected layer in the network. It can take
......
...@@ -235,7 +235,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): ...@@ -235,7 +235,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1):
''' r'''
Test RNNOp Test RNNOp
equation: equation:
h_t = \sigma (W x_t + U h_{t-1}) h_t = \sigma (W x_t + U h_{t-1})
......
...@@ -31,7 +31,8 @@ class TestFullOp(unittest.TestCase): ...@@ -31,7 +31,8 @@ class TestFullOp(unittest.TestCase):
train_program = Program() train_program = Program()
with program_guard(train_program, startup_program): with program_guard(train_program, startup_program):
fill_value = 2.0 fill_value = 2.0
input = paddle.fluid.data(name='input', dtype='float32', shape=[2, 3]) input = paddle.fluid.data(
name='input', dtype='float32', shape=[2, 3])
output = paddle.full_like(input, fill_value) output = paddle.full_like(input, fill_value)
output_dtype = paddle.full_like(input, fill_value, dtype='float32') output_dtype = paddle.full_like(input, fill_value, dtype='float32')
......
...@@ -25,7 +25,7 @@ from paddle.fluid import compiler, Program, program_guard ...@@ -25,7 +25,7 @@ from paddle.fluid import compiler, Program, program_guard
class TestLRNOp(OpTest): class TestLRNOp(OpTest):
def get_input(self): def get_input(self):
''' TODO(gongweibao): why it's grad diff is so large? r''' TODO(gongweibao): why it's grad diff is so large?
x = np.ndarray( x = np.ndarray(
shape=(self.N, self.C, self.H, self.W), dtype=float, order='C') shape=(self.N, self.C, self.H, self.W), dtype=float, order='C')
for m in range(0, self.N): for m in range(0, self.N):
......
...@@ -232,7 +232,7 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -232,7 +232,7 @@ class RecurrentOpTest1(unittest.TestCase):
class RecurrentOpTest2(RecurrentOpTest1): class RecurrentOpTest2(RecurrentOpTest1):
''' r'''
Test RNNOp Test RNNOp
equation: equation:
h_t = \sigma (W x_t + U h_{t-1}) h_t = \sigma (W x_t + U h_{t-1})
...@@ -469,7 +469,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): ...@@ -469,7 +469,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1):
class RecurrentOpSubBlockTest(RecurrentOpTest1): class RecurrentOpSubBlockTest(RecurrentOpTest1):
''' r'''
Test RNNOp with subblock variable Test RNNOp with subblock variable
equation: equation:
y_ = emb * w1 y_ = emb * w1
...@@ -608,7 +608,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): ...@@ -608,7 +608,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1):
class RecurrentOpStopGradientTest(RecurrentOpTest1): class RecurrentOpStopGradientTest(RecurrentOpTest1):
""" r"""
Test RNNOp with stop_gradient = True Test RNNOp with stop_gradient = True
equation: equation:
h_t = \sigma (W x_t + U h_{t-1}) h_t = \sigma (W x_t + U h_{t-1})
......
...@@ -79,7 +79,7 @@ class TestErrors(unittest.TestCase): ...@@ -79,7 +79,7 @@ class TestErrors(unittest.TestCase):
self.assertRaises(TypeError, test_input_type_1) self.assertRaises(TypeError, test_input_type_1)
# The value of params must be in format '\d+(\.\d+){0,3}', like '1.5.2.0', '1.6' ... # The value of params must be in format r'\d+(\.\d+){0,3}', like '1.5.2.0', '1.6' ...
def test_input_value_1(): def test_input_value_1():
fluid.require_version('string') fluid.require_version('string')
......
...@@ -35,7 +35,7 @@ def _is_numpy_(var): ...@@ -35,7 +35,7 @@ def _is_numpy_(var):
@six.add_metaclass(abc.ABCMeta) @six.add_metaclass(abc.ABCMeta)
class Metric(object): class Metric(object):
""" r"""
Base class for metric, encapsulates metric logic and APIs Base class for metric, encapsulates metric logic and APIs
Usage: Usage:
......
...@@ -58,7 +58,7 @@ import paddle ...@@ -58,7 +58,7 @@ import paddle
def elu(x, alpha=1.0, name=None): def elu(x, alpha=1.0, name=None):
""" r"""
elu activation. elu activation.
.. math:: .. math::
...@@ -101,7 +101,7 @@ def elu(x, alpha=1.0, name=None): ...@@ -101,7 +101,7 @@ def elu(x, alpha=1.0, name=None):
def gelu(x, approximate=False, name=None): def gelu(x, approximate=False, name=None):
""" r"""
gelu activation. gelu activation.
if approximate is True if approximate is True
...@@ -155,7 +155,7 @@ def gelu(x, approximate=False, name=None): ...@@ -155,7 +155,7 @@ def gelu(x, approximate=False, name=None):
def hardshrink(x, threshold=0.5, name=None): def hardshrink(x, threshold=0.5, name=None):
""" r"""
hard shrinkage activation hard shrinkage activation
.. math:: .. math::
...@@ -204,7 +204,7 @@ def hardshrink(x, threshold=0.5, name=None): ...@@ -204,7 +204,7 @@ def hardshrink(x, threshold=0.5, name=None):
def hardtanh(x, min=-1.0, max=1.0, name=None): def hardtanh(x, min=-1.0, max=1.0, name=None):
""" r"""
hardtanh activation hardtanh activation
.. math:: .. math::
...@@ -254,7 +254,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None): ...@@ -254,7 +254,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
def hardsigmoid(x, name=None): def hardsigmoid(x, name=None):
""" r"""
hardsigmoid activation. hardsigmoid activation.
A 3-part piecewise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), A 3-part piecewise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391),
...@@ -308,7 +308,7 @@ def hardsigmoid(x, name=None): ...@@ -308,7 +308,7 @@ def hardsigmoid(x, name=None):
def hardswish(x, name=None): def hardswish(x, name=None):
""" r"""
hardswish activation hardswish activation
hardswish is proposed in MobileNetV3, and performs better in computational stability hardswish is proposed in MobileNetV3, and performs better in computational stability
...@@ -357,7 +357,7 @@ def hardswish(x, name=None): ...@@ -357,7 +357,7 @@ def hardswish(x, name=None):
def leaky_relu(x, negative_slope=0.01, name=None): def leaky_relu(x, negative_slope=0.01, name=None):
""" r"""
leaky_relu activation leaky_relu activation
.. math:: .. math::
...@@ -515,7 +515,7 @@ def relu(x, name=None): ...@@ -515,7 +515,7 @@ def relu(x, name=None):
def log_sigmoid(x, name=None): def log_sigmoid(x, name=None):
""" r"""
log_sigmoid activation. log_sigmoid activation.
.. math:: .. math::
...@@ -552,7 +552,7 @@ def log_sigmoid(x, name=None): ...@@ -552,7 +552,7 @@ def log_sigmoid(x, name=None):
def maxout(x, groups, axis=1, name=None): def maxout(x, groups, axis=1, name=None):
""" r"""
maxout activation. maxout activation.
Assumed the input shape is (N, Ci, H, W). Assumed the input shape is (N, Ci, H, W).
...@@ -671,7 +671,7 @@ def selu(x, ...@@ -671,7 +671,7 @@ def selu(x,
scale=1.0507009873554804934193349852946, scale=1.0507009873554804934193349852946,
alpha=1.6732632423543772848170429916717, alpha=1.6732632423543772848170429916717,
name=None): name=None):
""" r"""
selu activation selu activation
.. math:: .. math::
...@@ -726,7 +726,7 @@ def selu(x, ...@@ -726,7 +726,7 @@ def selu(x,
def softmax(x, axis=-1, dtype=None, name=None): def softmax(x, axis=-1, dtype=None, name=None):
""" r"""
This operator implements the softmax layer. The calculation process is as follows: This operator implements the softmax layer. The calculation process is as follows:
1. The dimension :attr:`axis` of ``x`` will be permuted to the last. 1. The dimension :attr:`axis` of ``x`` will be permuted to the last.
...@@ -880,7 +880,7 @@ def softmax(x, axis=-1, dtype=None, name=None): ...@@ -880,7 +880,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
def softplus(x, beta=1, threshold=20, name=None): def softplus(x, beta=1, threshold=20, name=None):
""" r"""
softplus activation softplus activation
.. math:: .. math::
...@@ -925,7 +925,7 @@ def softplus(x, beta=1, threshold=20, name=None): ...@@ -925,7 +925,7 @@ def softplus(x, beta=1, threshold=20, name=None):
def softshrink(x, threshold=0.5, name=None): def softshrink(x, threshold=0.5, name=None):
""" r"""
softshrink activation softshrink activation
.. math:: .. math::
...@@ -976,7 +976,7 @@ def softshrink(x, threshold=0.5, name=None): ...@@ -976,7 +976,7 @@ def softshrink(x, threshold=0.5, name=None):
def softsign(x, name=None): def softsign(x, name=None):
""" r"""
softsign activation softsign activation
.. math:: .. math::
...@@ -1013,7 +1013,7 @@ def softsign(x, name=None): ...@@ -1013,7 +1013,7 @@ def softsign(x, name=None):
def swish(x, name=None): def swish(x, name=None):
""" r"""
swish activation. swish activation.
.. math:: .. math::
...@@ -1091,7 +1091,7 @@ def tanhshrink(x, name=None): ...@@ -1091,7 +1091,7 @@ def tanhshrink(x, name=None):
def thresholded_relu(x, threshold=1.0, name=None): def thresholded_relu(x, threshold=1.0, name=None):
""" r"""
thresholded relu activation. thresholded relu activation.
.. math:: .. math::
...@@ -1137,7 +1137,7 @@ def thresholded_relu(x, threshold=1.0, name=None): ...@@ -1137,7 +1137,7 @@ def thresholded_relu(x, threshold=1.0, name=None):
def log_softmax(x, axis=-1, dtype=None, name=None): def log_softmax(x, axis=-1, dtype=None, name=None):
""" r"""
This operator implements the log_softmax layer. The calculation process is This operator implements the log_softmax layer. The calculation process is
as follows: as follows:
......
...@@ -1413,7 +1413,7 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8): ...@@ -1413,7 +1413,7 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8):
def linear(x, weight, bias=None, name=None): def linear(x, weight, bias=None, name=None):
""" r"""
Fully-connected linear transformation operator. For each input :math:`X` , Fully-connected linear transformation operator. For each input :math:`X` ,
the equation is: the equation is:
...@@ -1500,7 +1500,7 @@ def linear(x, weight, bias=None, name=None): ...@@ -1500,7 +1500,7 @@ def linear(x, weight, bias=None, name=None):
def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
""" r"""
Label smoothing is a mechanism to regularize the classifier layer and is called Label smoothing is a mechanism to regularize the classifier layer and is called
label-smoothing regularization (LSR). label-smoothing regularization (LSR).
......
...@@ -166,7 +166,7 @@ def conv1d(x, ...@@ -166,7 +166,7 @@ def conv1d(x,
groups=1, groups=1,
data_format='NCL', data_format='NCL',
name=None): name=None):
""" r"""
The convolution1D layer calculates the output based on the input, filter The convolution1D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input and and strides, paddings, dilations, groups parameters. Input and
Output are in NCL format, where N is batch size, C is the number of Output are in NCL format, where N is batch size, C is the number of
...@@ -392,7 +392,7 @@ def conv2d(x, ...@@ -392,7 +392,7 @@ def conv2d(x,
groups=1, groups=1,
data_format="NCHW", data_format="NCHW",
name=None): name=None):
""" r"""
The convolution2D layer calculates the output based on the input, filter The convolution2D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input and and strides, paddings, dilations, groups parameters. Input and
...@@ -568,7 +568,7 @@ def conv1d_transpose(x, ...@@ -568,7 +568,7 @@ def conv1d_transpose(x,
output_size=None, output_size=None,
data_format="NCL", data_format="NCL",
name=None): name=None):
""" r"""
The 1-D convolution transpose layer calculates the output based on the input, The 1-D convolution transpose layer calculates the output based on the input,
filter, and dilation, stride, padding. Input(Input) and output(Output) filter, and dilation, stride, padding. Input(Input) and output(Output)
are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels, are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels,
...@@ -828,7 +828,7 @@ def conv2d_transpose(x, ...@@ -828,7 +828,7 @@ def conv2d_transpose(x,
output_size=None, output_size=None,
data_format='NCHW', data_format='NCHW',
name=None): name=None):
""" r"""
The convolution2D transpose layer calculates the output based on the input, The convolution2D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output) filter, and dilations, strides, paddings. Input(Input) and output(Output)
...@@ -1068,7 +1068,7 @@ def conv3d(x, ...@@ -1068,7 +1068,7 @@ def conv3d(x,
groups=1, groups=1,
data_format="NCDHW", data_format="NCDHW",
name=None): name=None):
""" r"""
The convolution3D layer calculates the output based on the input, filter The convolution3D layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and and strides, paddings, dilations, groups parameters. Input(Input) and
...@@ -1233,7 +1233,7 @@ def conv3d_transpose(x, ...@@ -1233,7 +1233,7 @@ def conv3d_transpose(x,
output_size=None, output_size=None,
data_format='NCDHW', data_format='NCDHW',
name=None): name=None):
""" r"""
The convolution3d transpose layer calculates the output based on the input, The convolution3d transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output) filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels, are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels,
......
...@@ -14,10 +14,7 @@ ...@@ -14,10 +14,7 @@
# TODO: define the extention functions # TODO: define the extention functions
__all__ = [ __all__ = ['diag_embed', 'row_conv']
'diag_embed',
'row_conv'
]
import numpy as np import numpy as np
from ...fluid.data_feeder import check_dtype from ...fluid.data_feeder import check_dtype
......
...@@ -111,7 +111,7 @@ def one_hot(x, num_classes, name=None): ...@@ -111,7 +111,7 @@ def one_hot(x, num_classes, name=None):
def embedding(x, weight, padding_idx=None, sparse=False, name=None): def embedding(x, weight, padding_idx=None, sparse=False, name=None):
""" r"""
The operator is used to lookup embeddings vector of ids provided by :attr:`x` . The operator is used to lookup embeddings vector of ids provided by :attr:`x` .
The shape of output Tensor is generated by appending the last dimension of the input Tensor shape The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
......
...@@ -184,7 +184,7 @@ def binary_cross_entropy_with_logits(logit, ...@@ -184,7 +184,7 @@ def binary_cross_entropy_with_logits(logit,
reduction='mean', reduction='mean',
pos_weight=None, pos_weight=None,
name=None): name=None):
""" r"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
layer and some reduce operations. layer and some reduce operations.
...@@ -461,7 +461,7 @@ def hsigmoid_loss(input, ...@@ -461,7 +461,7 @@ def hsigmoid_loss(input,
def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):
""" r"""
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
term if the absolute element-wise error falls below 1 and an L1 term otherwise. term if the absolute element-wise error falls below 1 and an L1 term otherwise.
In some cases it can prevent exploding gradients and it is more robust and less In some cases it can prevent exploding gradients and it is more robust and less
...@@ -544,7 +544,7 @@ def margin_ranking_loss(input, ...@@ -544,7 +544,7 @@ def margin_ranking_loss(input,
margin=0.0, margin=0.0,
reduction='mean', reduction='mean',
name=None): name=None):
""" r"""
This op the calcluate the the margin rank loss between the input, other and label, use the math function as follows. This op the calcluate the the margin rank loss between the input, other and label, use the math function as follows.
...@@ -646,7 +646,7 @@ def margin_ranking_loss(input, ...@@ -646,7 +646,7 @@ def margin_ranking_loss(input,
def l1_loss(input, label, reduction='mean', name=None): def l1_loss(input, label, reduction='mean', name=None):
""" r"""
This operator computes the L1 Loss of Tensor ``input`` and ``label`` as follows. This operator computes the L1 Loss of Tensor ``input`` and ``label`` as follows.
If `reduction` set to ``'none'``, the loss is: If `reduction` set to ``'none'``, the loss is:
...@@ -840,7 +840,7 @@ def nll_loss(input, ...@@ -840,7 +840,7 @@ def nll_loss(input,
def kl_div(input, label, reduction='mean', name=None): def kl_div(input, label, reduction='mean', name=None):
""" r"""
This operator calculates the Kullback-Leibler divergence loss This operator calculates the Kullback-Leibler divergence loss
between Input(X) and Input(Target). Notes that Input(X) is the between Input(X) and Input(Target). Notes that Input(X) is the
log-probability and Input(Target) is the probability. log-probability and Input(Target) is the probability.
...@@ -947,7 +947,7 @@ def kl_div(input, label, reduction='mean', name=None): ...@@ -947,7 +947,7 @@ def kl_div(input, label, reduction='mean', name=None):
def mse_loss(input, label, reduction='mean', name=None): def mse_loss(input, label, reduction='mean', name=None):
""" r"""
This op accepts input predications and label and returns the mean square error. This op accepts input predications and label and returns the mean square error.
If :attr:`reduction` is set to ``'none'``, loss is calculated as: If :attr:`reduction` is set to ``'none'``, loss is calculated as:
...@@ -1121,7 +1121,7 @@ def cross_entropy(input, ...@@ -1121,7 +1121,7 @@ def cross_entropy(input,
weight=None, weight=None,
ignore_index=-100, ignore_index=-100,
reduction='mean'): reduction='mean'):
""" r"""
This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``,
and ``NLLLoss`` together. and ``NLLLoss`` together.
...@@ -1252,7 +1252,7 @@ def sigmoid_focal_loss(logit, ...@@ -1252,7 +1252,7 @@ def sigmoid_focal_loss(logit,
gamma=2.0, gamma=2.0,
reduction='sum', reduction='sum',
name=None): name=None):
""" r"""
`Focal Loss <https://arxiv.org/abs/1708.02002>`_ is proposed to address the `Focal Loss <https://arxiv.org/abs/1708.02002>`_ is proposed to address the
foreground-background class imbalance for classification tasks. It down-weights foreground-background class imbalance for classification tasks. It down-weights
easily-classified examples and thus focuses training on hard examples. For example, easily-classified examples and thus focuses training on hard examples. For example,
......
...@@ -35,7 +35,7 @@ __all__ = [ ...@@ -35,7 +35,7 @@ __all__ = [
def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
""" r"""
This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes
.. math:: .. math::
...@@ -412,7 +412,7 @@ def local_response_norm(x, ...@@ -412,7 +412,7 @@ def local_response_norm(x,
k=1., k=1.,
data_format="NCHW", data_format="NCHW",
name=None): name=None):
""" r"""
Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions.
For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_ For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
......
...@@ -54,11 +54,7 @@ import numpy as np ...@@ -54,11 +54,7 @@ import numpy as np
# from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS # from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS
# from ...fluid.layers import shuffle_channel #DEFINE_ALIAS # from ...fluid.layers import shuffle_channel #DEFINE_ALIAS
__all__ = [ __all__ = ['affine_grid', 'grid_sample', 'pixel_shuffle']
'affine_grid',
'grid_sample',
'pixel_shuffle'
]
def affine_grid(theta, out_shape, align_corners=True, name=None): def affine_grid(theta, out_shape, align_corners=True, name=None):
......
...@@ -19,7 +19,7 @@ __all__ = ['KaimingUniform', 'KaimingNormal'] ...@@ -19,7 +19,7 @@ __all__ = ['KaimingUniform', 'KaimingNormal']
class KaimingNormal(MSRAInitializer): class KaimingNormal(MSRAInitializer):
"""Implements the Kaiming Normal initializer r"""Implements the Kaiming Normal initializer
This class implements the weight initialization from the paper This class implements the weight initialization from the paper
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
...@@ -62,7 +62,7 @@ class KaimingNormal(MSRAInitializer): ...@@ -62,7 +62,7 @@ class KaimingNormal(MSRAInitializer):
class KaimingUniform(MSRAInitializer): class KaimingUniform(MSRAInitializer):
"""Implements the Kaiming Uniform initializer r"""Implements the Kaiming Uniform initializer
This class implements the weight initialization from the paper This class implements the weight initialization from the paper
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
......
...@@ -18,7 +18,7 @@ __all__ = ['XavierNormal', 'XavierUniform'] ...@@ -18,7 +18,7 @@ __all__ = ['XavierNormal', 'XavierUniform']
class XavierNormal(XavierInitializer): class XavierNormal(XavierInitializer):
""" r"""
This class implements the Xavier weight initializer from the paper This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural `Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_ networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
...@@ -71,7 +71,7 @@ class XavierNormal(XavierInitializer): ...@@ -71,7 +71,7 @@ class XavierNormal(XavierInitializer):
class XavierUniform(XavierInitializer): class XavierUniform(XavierInitializer):
""" r"""
This class implements the Xavier weight initializer from the paper This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural `Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_ networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
......
...@@ -50,7 +50,7 @@ from .. import functional as F ...@@ -50,7 +50,7 @@ from .. import functional as F
class ELU(layers.Layer): class ELU(layers.Layer):
""" r"""
ELU Activation. ELU Activation.
.. math:: .. math::
...@@ -88,7 +88,7 @@ class ELU(layers.Layer): ...@@ -88,7 +88,7 @@ class ELU(layers.Layer):
class GELU(layers.Layer): class GELU(layers.Layer):
""" r"""
GELU Activation. GELU Activation.
If approximate is True If approximate is True
...@@ -137,7 +137,7 @@ class GELU(layers.Layer): ...@@ -137,7 +137,7 @@ class GELU(layers.Layer):
class Hardshrink(layers.Layer): class Hardshrink(layers.Layer):
""" r"""
Hardshrink Activation Hardshrink Activation
.. math:: .. math::
...@@ -181,7 +181,7 @@ class Hardshrink(layers.Layer): ...@@ -181,7 +181,7 @@ class Hardshrink(layers.Layer):
class Hardswish(layers.Layer): class Hardswish(layers.Layer):
""" r"""
Hardswish activation Hardswish activation
Hardswish is proposed in MobileNetV3, and performs better in computational stability Hardswish is proposed in MobileNetV3, and performs better in computational stability
...@@ -227,7 +227,7 @@ class Hardswish(layers.Layer): ...@@ -227,7 +227,7 @@ class Hardswish(layers.Layer):
class Tanh(layers.Layer): class Tanh(layers.Layer):
""" r"""
Tanh Activation. Tanh Activation.
.. math:: .. math::
...@@ -264,7 +264,7 @@ class Tanh(layers.Layer): ...@@ -264,7 +264,7 @@ class Tanh(layers.Layer):
class Hardtanh(layers.Layer): class Hardtanh(layers.Layer):
""" r"""
Hardtanh Activation Hardtanh Activation
.. math:: .. math::
...@@ -442,7 +442,7 @@ class ReLU6(layers.Layer): ...@@ -442,7 +442,7 @@ class ReLU6(layers.Layer):
class SELU(layers.Layer): class SELU(layers.Layer):
""" r"""
SELU Activation SELU Activation
.. math:: .. math::
...@@ -488,7 +488,7 @@ class SELU(layers.Layer): ...@@ -488,7 +488,7 @@ class SELU(layers.Layer):
class LeakyReLU(layers.Layer): class LeakyReLU(layers.Layer):
""" r"""
Leaky ReLU Activation. Leaky ReLU Activation.
.. math:: .. math::
...@@ -574,7 +574,7 @@ class Sigmoid(layers.Layer): ...@@ -574,7 +574,7 @@ class Sigmoid(layers.Layer):
class Hardsigmoid(layers.Layer): class Hardsigmoid(layers.Layer):
""" r"""
This interface is used to construct a callable object of the ``Hardsigmoid`` class. This interface is used to construct a callable object of the ``Hardsigmoid`` class.
This layer calcluate the `hardsigmoid` of input x. This layer calcluate the `hardsigmoid` of input x.
...@@ -621,7 +621,7 @@ class Hardsigmoid(layers.Layer): ...@@ -621,7 +621,7 @@ class Hardsigmoid(layers.Layer):
class Softplus(layers.Layer): class Softplus(layers.Layer):
""" r"""
Softplus Activation Softplus Activation
.. math:: .. math::
...@@ -661,7 +661,7 @@ class Softplus(layers.Layer): ...@@ -661,7 +661,7 @@ class Softplus(layers.Layer):
class Softshrink(layers.Layer): class Softshrink(layers.Layer):
""" r"""
Softshrink Activation Softshrink Activation
.. math:: .. math::
...@@ -702,7 +702,7 @@ class Softshrink(layers.Layer): ...@@ -702,7 +702,7 @@ class Softshrink(layers.Layer):
class Softsign(layers.Layer): class Softsign(layers.Layer):
""" r"""
Softsign Activation Softsign Activation
.. math:: .. math::
...@@ -737,7 +737,7 @@ class Softsign(layers.Layer): ...@@ -737,7 +737,7 @@ class Softsign(layers.Layer):
class Swish(layers.Layer): class Swish(layers.Layer):
""" r"""
Swish Activation. Swish Activation.
.. math:: .. math::
...@@ -807,7 +807,7 @@ class Tanhshrink(layers.Layer): ...@@ -807,7 +807,7 @@ class Tanhshrink(layers.Layer):
class ThresholdedReLU(layers.Layer): class ThresholdedReLU(layers.Layer):
""" r"""
Thresholded ReLU Activation Thresholded ReLU Activation
.. math:: .. math::
...@@ -847,7 +847,7 @@ class ThresholdedReLU(layers.Layer): ...@@ -847,7 +847,7 @@ class ThresholdedReLU(layers.Layer):
class LogSigmoid(layers.Layer): class LogSigmoid(layers.Layer):
""" r"""
LogSigmoid Activation. LogSigmoid Activation.
.. math:: .. math::
...@@ -882,7 +882,7 @@ class LogSigmoid(layers.Layer): ...@@ -882,7 +882,7 @@ class LogSigmoid(layers.Layer):
class Softmax(layers.Layer): class Softmax(layers.Layer):
""" r"""
Softmax Activation. Softmax Activation.
This operator implements the softmax layer. The calculation process is as follows: This operator implements the softmax layer. The calculation process is as follows:
...@@ -1005,7 +1005,7 @@ class Softmax(layers.Layer): ...@@ -1005,7 +1005,7 @@ class Softmax(layers.Layer):
class LogSoftmax(layers.Layer): class LogSoftmax(layers.Layer):
""" r"""
This operator implements the log_softmax layer. The calculation process is as follows: This operator implements the log_softmax layer. The calculation process is as follows:
.. math:: .. math::
...@@ -1059,7 +1059,7 @@ class LogSoftmax(layers.Layer): ...@@ -1059,7 +1059,7 @@ class LogSoftmax(layers.Layer):
class Maxout(layers.Layer): class Maxout(layers.Layer):
""" r"""
Maxout Activation. Maxout Activation.
Assumed the input shape is (N, Ci, H, W). Assumed the input shape is (N, Ci, H, W).
......
...@@ -40,7 +40,7 @@ __all__ = [ ...@@ -40,7 +40,7 @@ __all__ = [
class Linear(layers.Layer): class Linear(layers.Layer):
""" r"""
Fully-connected linear transformation layer. For each input :math:`X` , Fully-connected linear transformation layer. For each input :math:`X` ,
the equation is: the equation is:
...@@ -381,7 +381,7 @@ class Upsample(layers.Layer): ...@@ -381,7 +381,7 @@ class Upsample(layers.Layer):
class Bilinear(layers.Layer): class Bilinear(layers.Layer):
""" r"""
This layer performs bilinear on two inputs. This layer performs bilinear on two inputs.
...@@ -988,7 +988,7 @@ class CosineSimilarity(layers.Layer): ...@@ -988,7 +988,7 @@ class CosineSimilarity(layers.Layer):
class Embedding(layers.Layer): class Embedding(layers.Layer):
""" r"""
**Embedding Layer** **Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class. This interface is used to construct a callable object of the ``Embedding`` class.
......
...@@ -141,7 +141,7 @@ class _ConvNd(layers.Layer): ...@@ -141,7 +141,7 @@ class _ConvNd(layers.Layer):
class Conv1D(_ConvNd): class Conv1D(_ConvNd):
""" r"""
This interface is used to construct a callable object of the ``Conv1D`` class. This interface is used to construct a callable object of the ``Conv1D`` class.
For more details, refer to code examples. For more details, refer to code examples.
The convolution1D layer calculates the output based on the input, filter The convolution1D layer calculates the output based on the input, filter
...@@ -294,7 +294,7 @@ class Conv1D(_ConvNd): ...@@ -294,7 +294,7 @@ class Conv1D(_ConvNd):
class Conv1DTranspose(_ConvNd): class Conv1DTranspose(_ConvNd):
""" r"""
This interface is used to construct a callable object of the ``Conv1DTranspose`` class. This interface is used to construct a callable object of the ``Conv1DTranspose`` class.
For more details, refer to code examples. For more details, refer to code examples.
The 1-D convolution transpose layer calculates the output based on the input, The 1-D convolution transpose layer calculates the output based on the input,
...@@ -469,7 +469,7 @@ class Conv1DTranspose(_ConvNd): ...@@ -469,7 +469,7 @@ class Conv1DTranspose(_ConvNd):
class Conv2D(_ConvNd): class Conv2D(_ConvNd):
""" r"""
This interface is used to construct a callable object of the ``Conv2D`` class. This interface is used to construct a callable object of the ``Conv2D`` class.
For more details, refer to code examples. For more details, refer to code examples.
The convolution2D layer calculates the output based on the input, filter The convolution2D layer calculates the output based on the input, filter
...@@ -626,7 +626,7 @@ class Conv2D(_ConvNd): ...@@ -626,7 +626,7 @@ class Conv2D(_ConvNd):
class Conv2DTranspose(_ConvNd): class Conv2DTranspose(_ConvNd):
""" r"""
This interface is used to construct a callable object of the ``Conv2DTranspose`` class. This interface is used to construct a callable object of the ``Conv2DTranspose`` class.
For more details, refer to code examples. For more details, refer to code examples.
The convolution2D transpose layer calculates the output based on the input, The convolution2D transpose layer calculates the output based on the input,
...@@ -786,7 +786,7 @@ class Conv2DTranspose(_ConvNd): ...@@ -786,7 +786,7 @@ class Conv2DTranspose(_ConvNd):
class Conv3D(_ConvNd): class Conv3D(_ConvNd):
""" r"""
**Convlution3d Layer** **Convlution3d Layer**
The convolution3d layer calculates the output based on the input, filter The convolution3d layer calculates the output based on the input, filter
and strides, paddings, dilations, groups parameters. Input(Input) and and strides, paddings, dilations, groups parameters. Input(Input) and
...@@ -943,7 +943,7 @@ class Conv3D(_ConvNd): ...@@ -943,7 +943,7 @@ class Conv3D(_ConvNd):
class Conv3DTranspose(_ConvNd): class Conv3DTranspose(_ConvNd):
""" r"""
**Convlution3D transpose layer** **Convlution3D transpose layer**
The convolution3D transpose layer calculates the output based on the input, The convolution3D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output) filter, and dilations, strides, paddings. Input(Input) and output(Output)
......
...@@ -24,7 +24,7 @@ from ...fluid.layer_helper import LayerHelper ...@@ -24,7 +24,7 @@ from ...fluid.layer_helper import LayerHelper
class PairwiseDistance(layers.Layer): class PairwiseDistance(layers.Layer):
""" r"""
This operator computes the pairwise distance between two vectors. The This operator computes the pairwise distance between two vectors. The
distance is calculated by p-oreder norm: distance is calculated by p-oreder norm:
......
...@@ -36,7 +36,7 @@ __all__ = [ ...@@ -36,7 +36,7 @@ __all__ = [
class BCEWithLogitsLoss(fluid.dygraph.Layer): class BCEWithLogitsLoss(fluid.dygraph.Layer):
""" r"""
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
layer and some reduce operations. layer and some reduce operations.
...@@ -141,7 +141,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): ...@@ -141,7 +141,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):
class CrossEntropyLoss(fluid.dygraph.Layer): class CrossEntropyLoss(fluid.dygraph.Layer):
""" r"""
:alias_main: paddle.nn.CrossEntropyLoss :alias_main: paddle.nn.CrossEntropyLoss
:alias: paddle.nn.CrossEntropyLoss,paddle.nn.layer.CrossEntropyLoss,paddle.nn.layer.loss.CrossEntropyLoss :alias: paddle.nn.CrossEntropyLoss,paddle.nn.layer.CrossEntropyLoss,paddle.nn.layer.loss.CrossEntropyLoss
...@@ -375,7 +375,7 @@ class HSigmoidLoss(fluid.dygraph.Layer): ...@@ -375,7 +375,7 @@ class HSigmoidLoss(fluid.dygraph.Layer):
class MSELoss(fluid.dygraph.layers.Layer): class MSELoss(fluid.dygraph.layers.Layer):
""" r"""
**Mean Square Error Loss** **Mean Square Error Loss**
Computes the mean square error (squared L2 norm) of given input and label. Computes the mean square error (squared L2 norm) of given input and label.
...@@ -454,7 +454,7 @@ class MSELoss(fluid.dygraph.layers.Layer): ...@@ -454,7 +454,7 @@ class MSELoss(fluid.dygraph.layers.Layer):
class L1Loss(fluid.dygraph.Layer): class L1Loss(fluid.dygraph.Layer):
""" r"""
This interface is used to construct a callable object of the ``L1Loss`` class. This interface is used to construct a callable object of the ``L1Loss`` class.
The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows.
...@@ -622,7 +622,7 @@ class BCELoss(fluid.dygraph.Layer): ...@@ -622,7 +622,7 @@ class BCELoss(fluid.dygraph.Layer):
class NLLLoss(fluid.dygraph.Layer): class NLLLoss(fluid.dygraph.Layer):
""" r"""
This class accepts input and target label and returns negative log likelihood This class accepts input and target label and returns negative log likelihood
cross error. It is useful to train a classification problem with C classes. cross error. It is useful to train a classification problem with C classes.
...@@ -733,7 +733,7 @@ class NLLLoss(fluid.dygraph.Layer): ...@@ -733,7 +733,7 @@ class NLLLoss(fluid.dygraph.Layer):
class KLDivLoss(fluid.dygraph.Layer): class KLDivLoss(fluid.dygraph.Layer):
""" r"""
This interface calculates the Kullback-Leibler divergence loss This interface calculates the Kullback-Leibler divergence loss
between Input(X) and Input(Target). Notes that Input(X) is the between Input(X) and Input(Target). Notes that Input(X) is the
log-probability and Input(Target) is the probability. log-probability and Input(Target) is the probability.
...@@ -806,7 +806,7 @@ class KLDivLoss(fluid.dygraph.Layer): ...@@ -806,7 +806,7 @@ class KLDivLoss(fluid.dygraph.Layer):
class MarginRankingLoss(fluid.dygraph.Layer): class MarginRankingLoss(fluid.dygraph.Layer):
""" r"""
This interface is used to construct a callable object of the ``MarginRankingLoss`` class. This interface is used to construct a callable object of the ``MarginRankingLoss`` class.
The MarginRankingLoss layer calculates the margin rank loss between the input, other and label The MarginRankingLoss layer calculates the margin rank loss between the input, other and label
...@@ -958,7 +958,7 @@ class CTCLoss(fluid.dygraph.Layer): ...@@ -958,7 +958,7 @@ class CTCLoss(fluid.dygraph.Layer):
class SmoothL1Loss(fluid.dygraph.Layer): class SmoothL1Loss(fluid.dygraph.Layer):
""" r"""
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
term if the absolute element-wise error falls below 1 and an L1 term otherwise. term if the absolute element-wise error falls below 1 and an L1 term otherwise.
In some cases it can prevent exploding gradients and it is more robust and less In some cases it can prevent exploding gradients and it is more robust and less
......
...@@ -109,7 +109,7 @@ class _InstanceNormBase(layers.Layer): ...@@ -109,7 +109,7 @@ class _InstanceNormBase(layers.Layer):
class InstanceNorm1D(_InstanceNormBase): class InstanceNorm1D(_InstanceNormBase):
""" r"""
Applies Instance Normalization over a 3D input (a mini-batch of 1D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . Applies Instance Normalization over a 3D input (a mini-batch of 1D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization .
DataLayout: NCL `[batch, in_channels, length]` DataLayout: NCL `[batch, in_channels, length]`
...@@ -181,7 +181,7 @@ class InstanceNorm1D(_InstanceNormBase): ...@@ -181,7 +181,7 @@ class InstanceNorm1D(_InstanceNormBase):
class InstanceNorm2D(_InstanceNormBase): class InstanceNorm2D(_InstanceNormBase):
""" r"""
Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization .
DataLayout: NCHW `[batch, in_channels, in_height, in_width]` DataLayout: NCHW `[batch, in_channels, in_height, in_width]`
...@@ -252,7 +252,7 @@ class InstanceNorm2D(_InstanceNormBase): ...@@ -252,7 +252,7 @@ class InstanceNorm2D(_InstanceNormBase):
class InstanceNorm3D(_InstanceNormBase): class InstanceNorm3D(_InstanceNormBase):
""" r"""
Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization .
DataLayout: NCHW `[batch, in_channels, D, in_height, in_width]` DataLayout: NCHW `[batch, in_channels, D, in_height, in_width]`
...@@ -437,7 +437,7 @@ class GroupNorm(layers.Layer): ...@@ -437,7 +437,7 @@ class GroupNorm(layers.Layer):
class LayerNorm(layers.Layer): class LayerNorm(layers.Layer):
""" r"""
:alias_main: paddle.nn.LayerNorm :alias_main: paddle.nn.LayerNorm
:alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm
:old_api: paddle.fluid.dygraph.LayerNorm :old_api: paddle.fluid.dygraph.LayerNorm
...@@ -649,7 +649,7 @@ class _BatchNormBase(layers.Layer): ...@@ -649,7 +649,7 @@ class _BatchNormBase(layers.Layer):
class BatchNorm1D(_BatchNormBase): class BatchNorm1D(_BatchNormBase):
""" r"""
Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
When track_running_stats = False, the :math:`\\mu_{\\beta}` When track_running_stats = False, the :math:`\\mu_{\\beta}`
...@@ -740,7 +740,7 @@ class BatchNorm1D(_BatchNormBase): ...@@ -740,7 +740,7 @@ class BatchNorm1D(_BatchNormBase):
class BatchNorm2D(_BatchNormBase): class BatchNorm2D(_BatchNormBase):
""" r"""
Applies Batch Normalization over a 4D input (a mini-batch of 2D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . Applies Batch Normalization over a 4D input (a mini-batch of 2D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
When track_running_stats = False, the :math:`\\mu_{\\beta}` When track_running_stats = False, the :math:`\\mu_{\\beta}`
...@@ -829,7 +829,7 @@ class BatchNorm2D(_BatchNormBase): ...@@ -829,7 +829,7 @@ class BatchNorm2D(_BatchNormBase):
class BatchNorm3D(_BatchNormBase): class BatchNorm3D(_BatchNormBase):
""" r"""
Applies Batch Normalization over a 5D input (a mini-batch of 3D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . Applies Batch Normalization over a 5D input (a mini-batch of 3D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
When track_running_stats = False, the :math:`\\mu_{\\beta}` When track_running_stats = False, the :math:`\\mu_{\\beta}`
...@@ -919,7 +919,7 @@ class BatchNorm3D(_BatchNormBase): ...@@ -919,7 +919,7 @@ class BatchNorm3D(_BatchNormBase):
class SyncBatchNorm(_BatchNormBase): class SyncBatchNorm(_BatchNormBase):
""" r"""
This interface is used to construct a callable object of the ``SyncBatchNorm`` class. This interface is used to construct a callable object of the ``SyncBatchNorm`` class.
It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can
be used as a normalizer function for other operations, such as conv2d and fully connected be used as a normalizer function for other operations, such as conv2d and fully connected
......
...@@ -120,7 +120,7 @@ class AvgPool1D(layers.Layer): ...@@ -120,7 +120,7 @@ class AvgPool1D(layers.Layer):
class AvgPool2D(layers.Layer): class AvgPool2D(layers.Layer):
""" r"""
This operation applies 2D average pooling over input features based on the input, This operation applies 2D average pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels, in NCHW format, where N is batch size, C is the number of channels,
...@@ -401,7 +401,7 @@ class MaxPool1D(layers.Layer): ...@@ -401,7 +401,7 @@ class MaxPool1D(layers.Layer):
class MaxPool2D(layers.Layer): class MaxPool2D(layers.Layer):
""" r"""
This operation applies 2D max pooling over input feature based on the input, This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels, in NCHW format, where N is batch size, C is the number of channels,
...@@ -595,7 +595,7 @@ class MaxPool3D(layers.Layer): ...@@ -595,7 +595,7 @@ class MaxPool3D(layers.Layer):
class AdaptiveAvgPool1D(layers.Layer): class AdaptiveAvgPool1D(layers.Layer):
""" r"""
This operation applies a 1D adaptive average pooling over an input signal composed This operation applies a 1D adaptive average pooling over an input signal composed
of several input planes, based on the input, output_size, return_mask parameters. of several input planes, based on the input, output_size, return_mask parameters.
...@@ -663,7 +663,7 @@ class AdaptiveAvgPool1D(layers.Layer): ...@@ -663,7 +663,7 @@ class AdaptiveAvgPool1D(layers.Layer):
class AdaptiveAvgPool2D(layers.Layer): class AdaptiveAvgPool2D(layers.Layer):
""" r"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. of the output tensor are determined by the parameter output_size.
...@@ -745,7 +745,7 @@ class AdaptiveAvgPool2D(layers.Layer): ...@@ -745,7 +745,7 @@ class AdaptiveAvgPool2D(layers.Layer):
class AdaptiveAvgPool3D(layers.Layer): class AdaptiveAvgPool3D(layers.Layer):
""" r"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. of the output tensor are determined by the parameter output_size.
......
...@@ -157,7 +157,7 @@ class MultiHeadAttention(Layer): ...@@ -157,7 +157,7 @@ class MultiHeadAttention(Layer):
embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) embed_dim, embed_dim, weight_attr, bias_attr=bias_attr)
def _prepare_qkv(self, query, key, value, cache=None): def _prepare_qkv(self, query, key, value, cache=None):
""" r"""
Prapares linear projected queries, keys and values for usage of subsequnt Prapares linear projected queries, keys and values for usage of subsequnt
multiple parallel attention. If `cache` is not None, using cached results multiple parallel attention. If `cache` is not None, using cached results
to reduce redundant calculations. to reduce redundant calculations.
...@@ -212,7 +212,7 @@ class MultiHeadAttention(Layer): ...@@ -212,7 +212,7 @@ class MultiHeadAttention(Layer):
return (q, k, v) if cache is None else (q, k, v, cache) return (q, k, v) if cache is None else (q, k, v, cache)
def compute_kv(self, key, value): def compute_kv(self, key, value):
""" r"""
Applies linear projection on input keys and values, then splits heads Applies linear projection on input keys and values, then splits heads
(reshape and transpose) to get keys and values from different representation (reshape and transpose) to get keys and values from different representation
subspaces. The results are used as key-values pairs for subsequent multiple subspaces. The results are used as key-values pairs for subsequent multiple
...@@ -312,7 +312,7 @@ class MultiHeadAttention(Layer): ...@@ -312,7 +312,7 @@ class MultiHeadAttention(Layer):
return self.Cache(key, value) return self.Cache(key, value)
def forward(self, query, key, value, attn_mask=None, cache=None): def forward(self, query, key, value, attn_mask=None, cache=None):
""" r"""
Applies multi-head attention to map queries and a set of key-value pairs Applies multi-head attention to map queries and a set of key-value pairs
to outputs. to outputs.
...@@ -499,7 +499,7 @@ class TransformerEncoderLayer(Layer): ...@@ -499,7 +499,7 @@ class TransformerEncoderLayer(Layer):
self.activation = getattr(F, activation) self.activation = getattr(F, activation)
def forward(self, src, src_mask=None): def forward(self, src, src_mask=None):
""" r"""
Applies a Transformer encoder layer on the input. Applies a Transformer encoder layer on the input.
Parameters: Parameters:
...@@ -575,7 +575,7 @@ class TransformerEncoder(Layer): ...@@ -575,7 +575,7 @@ class TransformerEncoder(Layer):
self.norm = norm self.norm = norm
def forward(self, src, src_mask=None): def forward(self, src, src_mask=None):
""" r"""
Applies a stack of N Transformer encoder layers on inputs. If `norm` is Applies a stack of N Transformer encoder layers on inputs. If `norm` is
provided, also applies layer normalization on the output of last encoder provided, also applies layer normalization on the output of last encoder
layer. layer.
...@@ -725,7 +725,7 @@ class TransformerDecoderLayer(Layer): ...@@ -725,7 +725,7 @@ class TransformerDecoderLayer(Layer):
self.activation = getattr(F, activation) self.activation = getattr(F, activation)
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None):
""" r"""
Applies a Transformer decoder layer on the input. Applies a Transformer decoder layer on the input.
Parameters: Parameters:
...@@ -801,7 +801,7 @@ class TransformerDecoderLayer(Layer): ...@@ -801,7 +801,7 @@ class TransformerDecoderLayer(Layer):
static_cache)) static_cache))
def gen_cache(self, memory): def gen_cache(self, memory):
""" r"""
Generates cache for `forward` usage. The generated cache is a tuple Generates cache for `forward` usage. The generated cache is a tuple
composed of an instance of `MultiHeadAttention.Cache` and an instance composed of an instance of `MultiHeadAttention.Cache` and an instance
of `MultiHeadAttention.StaticCache`. of `MultiHeadAttention.StaticCache`.
...@@ -873,7 +873,7 @@ class TransformerDecoder(Layer): ...@@ -873,7 +873,7 @@ class TransformerDecoder(Layer):
self.norm = norm self.norm = norm
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None):
""" r"""
Applies a stack of N Transformer decoder layers on inputs. If `norm` is Applies a stack of N Transformer decoder layers on inputs. If `norm` is
provided, also applies layer normalization on the output of last decoder provided, also applies layer normalization on the output of last decoder
layer. layer.
...@@ -937,7 +937,7 @@ class TransformerDecoder(Layer): ...@@ -937,7 +937,7 @@ class TransformerDecoder(Layer):
return output if cache is None else (output, new_caches) return output if cache is None else (output, new_caches)
def gen_cache(self, memory, do_zip=False): def gen_cache(self, memory, do_zip=False):
""" r"""
Generates cache for `forward` usage. The generated cache is a list, and Generates cache for `forward` usage. The generated cache is a list, and
each element in it is a tuple( :code:`(incremental_cache, static_cache)` ) each element in it is a tuple( :code:`(incremental_cache, static_cache)` )
produced by `TransformerDecoderLayer.gen_cache`. See `TransformerDecoderLayer.gen_cache` produced by `TransformerDecoderLayer.gen_cache`. See `TransformerDecoderLayer.gen_cache`
...@@ -1139,7 +1139,7 @@ class Transformer(Layer): ...@@ -1139,7 +1139,7 @@ class Transformer(Layer):
self.nhead = nhead self.nhead = nhead
def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None): def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None):
""" r"""
Applies a Transformer model on the inputs. Applies a Transformer model on the inputs.
Parameters: Parameters:
......
...@@ -153,7 +153,7 @@ class WeightNorm(object): ...@@ -153,7 +153,7 @@ class WeightNorm(object):
def weight_norm(layer, name='weight', dim=0): def weight_norm(layer, name='weight', dim=0):
""" r"""
This weight_norm layer applies weight normalization to a parameter according to the This weight_norm layer applies weight normalization to a parameter according to the
following formula: following formula:
......
...@@ -21,7 +21,7 @@ __all__ = ["Adadelta"] ...@@ -21,7 +21,7 @@ __all__ = ["Adadelta"]
class Adadelta(Optimizer): class Adadelta(Optimizer):
""" r"""
**Notes: This API does not support sparse parameter optimization.** **Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details: Adadelta Optimizer. Please refer to this for details:
......
...@@ -21,7 +21,7 @@ __all__ = ["Adagrad"] ...@@ -21,7 +21,7 @@ __all__ = ["Adagrad"]
class Adagrad(Optimizer): class Adagrad(Optimizer):
""" r"""
The Adaptive Gradient optimizer (Adagrad for short) use an optimization described The Adaptive Gradient optimizer (Adagrad for short) use an optimization described
in paper: `Adaptive Subgradient Methods for Online Learning and in paper: `Adaptive Subgradient Methods for Online Learning and
Stochastic Optimization <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_. Stochastic Optimization <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_.
......
...@@ -24,7 +24,7 @@ __all__ = ["Adam"] ...@@ -24,7 +24,7 @@ __all__ = ["Adam"]
class Adam(Optimizer): class Adam(Optimizer):
""" r"""
The Adam optimizer uses an optimization described at the end The Adam optimizer uses an optimization described at the end
of section 2 of `Adam paper <https://arxiv.org/abs/1412.6980>`_ , of section 2 of `Adam paper <https://arxiv.org/abs/1412.6980>`_ ,
it can dynamically adjusts the learning rate of each parameter using it can dynamically adjusts the learning rate of each parameter using
......
...@@ -21,7 +21,7 @@ __all__ = ["Adamax"] ...@@ -21,7 +21,7 @@ __all__ = ["Adamax"]
class Adamax(Optimizer): class Adamax(Optimizer):
""" r"""
The Adamax optimizer is implemented based on the Adamax Optimization The Adamax optimizer is implemented based on the Adamax Optimization
in Section 7 of `Adam paper <https://arxiv.org/abs/1412.6980>`_. in Section 7 of `Adam paper <https://arxiv.org/abs/1412.6980>`_.
The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm, The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm,
......
...@@ -23,7 +23,7 @@ __all__ = ['AdamW'] ...@@ -23,7 +23,7 @@ __all__ = ['AdamW']
class AdamW(Adam): class AdamW(Adam):
""" r"""
The AdamW optimizer is implemented based on the AdamW Optimization The AdamW optimizer is implemented based on the AdamW Optimization
in paper `DECOUPLED WEIGHT DECAY REGULARIZATION <https://arxiv.org/pdf/1711.05101.pdf>`_. in paper `DECOUPLED WEIGHT DECAY REGULARIZATION <https://arxiv.org/pdf/1711.05101.pdf>`_.
it can resolves the problem of L2 regularization failure in the Adam optimizer. it can resolves the problem of L2 regularization failure in the Adam optimizer.
......
...@@ -192,7 +192,7 @@ class LRScheduler(object): ...@@ -192,7 +192,7 @@ class LRScheduler(object):
class NoamDecay(LRScheduler): class NoamDecay(LRScheduler):
""" r"""
Applies Noam Decay to the initial learning rate. Applies Noam Decay to the initial learning rate.
...@@ -376,7 +376,7 @@ class PiecewiseDecay(LRScheduler): ...@@ -376,7 +376,7 @@ class PiecewiseDecay(LRScheduler):
class NaturalExpDecay(LRScheduler): class NaturalExpDecay(LRScheduler):
""" r"""
Applies natural exponential decay to the initial learning rate. Applies natural exponential decay to the initial learning rate.
...@@ -455,7 +455,7 @@ class NaturalExpDecay(LRScheduler): ...@@ -455,7 +455,7 @@ class NaturalExpDecay(LRScheduler):
class InverseTimeDecay(LRScheduler): class InverseTimeDecay(LRScheduler):
""" r"""
Applies inverse time decay to the initial learning rate. Applies inverse time decay to the initial learning rate.
...@@ -536,7 +536,7 @@ class InverseTimeDecay(LRScheduler): ...@@ -536,7 +536,7 @@ class InverseTimeDecay(LRScheduler):
class PolynomialDecay(LRScheduler): class PolynomialDecay(LRScheduler):
""" r"""
Applies polynomial decay to the initial learning rate. Applies polynomial decay to the initial learning rate.
...@@ -656,7 +656,7 @@ class PolynomialDecay(LRScheduler): ...@@ -656,7 +656,7 @@ class PolynomialDecay(LRScheduler):
class LinearWarmup(LRScheduler): class LinearWarmup(LRScheduler):
""" r"""
Linear learning rate warm up strategy. Update the learning rate preliminarily before the normal learning rate scheduler. Linear learning rate warm up strategy. Update the learning rate preliminarily before the normal learning rate scheduler.
For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks <https://arxiv.org/abs/1812.01187>`_ For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks <https://arxiv.org/abs/1812.01187>`_
...@@ -794,7 +794,7 @@ class LinearWarmup(LRScheduler): ...@@ -794,7 +794,7 @@ class LinearWarmup(LRScheduler):
class ExponentialDecay(LRScheduler): class ExponentialDecay(LRScheduler):
""" r"""
Update learning rate by `gamma` each epoch. Update learning rate by `gamma` each epoch.
...@@ -1383,7 +1383,7 @@ class ReduceOnPlateau(LRScheduler): ...@@ -1383,7 +1383,7 @@ class ReduceOnPlateau(LRScheduler):
class CosineAnnealingDecay(LRScheduler): class CosineAnnealingDecay(LRScheduler):
""" r"""
Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to
the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in
......
...@@ -21,7 +21,7 @@ __all__ = ["Momentum"] ...@@ -21,7 +21,7 @@ __all__ = ["Momentum"]
class Momentum(Optimizer): class Momentum(Optimizer):
""" r"""
Simple Momentum optimizer with velocity state Simple Momentum optimizer with velocity state
......
...@@ -47,7 +47,7 @@ __all__ = ['Optimizer'] ...@@ -47,7 +47,7 @@ __all__ = ['Optimizer']
class Optimizer(object): class Optimizer(object):
"""Optimizer Base class. r"""Optimizer Base class.
Define the common interface of an optimizer. Define the common interface of an optimizer.
User should not use this class directly, User should not use this class directly,
......
...@@ -21,7 +21,7 @@ __all__ = ["RMSProp"] ...@@ -21,7 +21,7 @@ __all__ = ["RMSProp"]
class RMSProp(Optimizer): class RMSProp(Optimizer):
""" r"""
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning
rate method. The original slides proposed RMSProp: Slide 29 of rate method. The original slides proposed RMSProp: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf . http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf .
......
...@@ -21,7 +21,7 @@ __all__ = ["SGD"] ...@@ -21,7 +21,7 @@ __all__ = ["SGD"]
class SGD(Optimizer): class SGD(Optimizer):
""" r"""
Optimizer of the stochastic gradient descent algorithm. Optimizer of the stochastic gradient descent algorithm.
.. math:: .. math::
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" r"""
At training and testing time, PaddlePaddle programs need to read data. To ease At training and testing time, PaddlePaddle programs need to read data. To ease
the users' work to write data reading code, we define that the users' work to write data reading code, we define that
......
...@@ -18,7 +18,7 @@ import paddle.fluid as fluid ...@@ -18,7 +18,7 @@ import paddle.fluid as fluid
class L1Decay(fluid.regularizer.L1Decay): class L1Decay(fluid.regularizer.L1Decay):
""" r"""
Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse.
It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
...@@ -80,7 +80,7 @@ class L1Decay(fluid.regularizer.L1Decay): ...@@ -80,7 +80,7 @@ class L1Decay(fluid.regularizer.L1Decay):
class L2Decay(fluid.regularizer.L2Decay): class L2Decay(fluid.regularizer.L2Decay):
""" r"""
Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting.
It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ).
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from __future__ import print_function from __future__ import print_function
import errno import errno
import inspect import inspect
import logging import logging
...@@ -31,7 +30,6 @@ from paddle.fluid.io import prepend_feed_ops, append_fetch_ops, save_persistable ...@@ -31,7 +30,6 @@ from paddle.fluid.io import prepend_feed_ops, append_fetch_ops, save_persistable
from paddle.fluid.io import load_persistables, _endpoints_replacement from paddle.fluid.io import load_persistables, _endpoints_replacement
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
__all__ = [ __all__ = [
'save_inference_model', 'save_inference_model',
'load_inference_model', 'load_inference_model',
...@@ -44,10 +42,13 @@ _logger = get_logger( ...@@ -44,10 +42,13 @@ _logger = get_logger(
def _check_args(caller, args, supported_args=[], deprecated_args=[]): def _check_args(caller, args, supported_args=[], deprecated_args=[]):
for arg in args: for arg in args:
if arg in deprecated_args: if arg in deprecated_args:
raise ValueError("argument '{}' in function '{}' is deprecated, only {} are supported.".format(arg, caller, supported_args)) raise ValueError(
"argument '{}' in function '{}' is deprecated, only {} are supported.".
format(arg, caller, supported_args))
elif arg not in supported_args: elif arg not in supported_args:
raise ValueError( raise ValueError(
"function '{}' doesn't support argument '{}',\n only {} are supported.".format(caller, arg, supported_args)) "function '{}' doesn't support argument '{}',\n only {} are supported.".
format(caller, arg, supported_args))
@static_only @static_only
...@@ -129,14 +130,18 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): ...@@ -129,14 +130,18 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor):
# verify feed_vars # verify feed_vars
if not isinstance(feed_vars, list): if not isinstance(feed_vars, list):
feed_vars = [feed_vars] feed_vars = [feed_vars]
if not feed_vars or not all([isinstance(var, Variable) for var in feed_vars]): if not feed_vars or not all(
raise ValueError("'feed_vars' should be a Variable or a list of Variable.") [isinstance(var, Variable) for var in feed_vars]):
raise ValueError(
"'feed_vars' should be a Variable or a list of Variable.")
# verify fetch_vars # verify fetch_vars
if not isinstance(fetch_vars, list): if not isinstance(fetch_vars, list):
fetch_vars = [fetch_vars] fetch_vars = [fetch_vars]
if not fetch_vars or not all([isinstance(var, Variable) for var in fetch_vars]): if not fetch_vars or not all(
raise ValueError("'fetch_vars' should be a Variable or a list of Variable.") [isinstance(var, Variable) for var in fetch_vars]):
raise ValueError(
"'fetch_vars' should be a Variable or a list of Variable.")
main_program = _get_valid_program() main_program = _get_valid_program()
# remind users to set auc_states to 0 if auc op were found. # remind users to set auc_states to 0 if auc op were found.
...@@ -145,7 +150,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): ...@@ -145,7 +150,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor):
device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
op._set_attr(device_attr_name, "") op._set_attr(device_attr_name, "")
if op.type == 'auc': if op.type == 'auc':
warnings.warn("Be sure that you have set auc states to 0 before saving inference model.") warnings.warn(
"Be sure that you have set auc states to 0 before saving inference model."
)
break break
# fix the bug that the activation op's output as target will be pruned. # fix the bug that the activation op's output as target will be pruned.
...@@ -154,10 +161,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): ...@@ -154,10 +161,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor):
with program_guard(main_program): with program_guard(main_program):
uniq_fetch_vars = [] uniq_fetch_vars = []
for i, var in enumerate(fetch_vars): for i, var in enumerate(fetch_vars):
var = layers.scale(var, 1., name="save_infer_model/scale_{}".format(i)) var = layers.scale(
var, 1., name="save_infer_model/scale_{}".format(i))
uniq_fetch_vars.append(var) uniq_fetch_vars.append(var)
fetch_vars = uniq_fetch_vars fetch_vars = uniq_fetch_vars
# save model # save model
origin_program = main_program.clone() origin_program = main_program.clone()
main_program = main_program.clone() main_program = main_program.clone()
...@@ -257,7 +265,7 @@ def load_inference_model(path_prefix, executor, **configs): ...@@ -257,7 +265,7 @@ def load_inference_model(path_prefix, executor, **configs):
""" """
# check configs # check configs
supported_args = ('model_filename', 'params_filename') supported_args = ('model_filename', 'params_filename')
deprecated_args = ('pserver_endpoints',) deprecated_args = ('pserver_endpoints', )
caller = inspect.currentframe().f_code.co_name caller = inspect.currentframe().f_code.co_name
_check_args(caller, configs, supported_args, deprecated_args) _check_args(caller, configs, supported_args, deprecated_args)
...@@ -268,8 +276,7 @@ def load_inference_model(path_prefix, executor, **configs): ...@@ -268,8 +276,7 @@ def load_inference_model(path_prefix, executor, **configs):
params_filename = configs.get('params_filename', None) params_filename = configs.get('params_filename', None)
if params_filename is None: if params_filename is None:
raise ValueError( raise ValueError(
"params_filename cannot be None when path_prefix is None." "params_filename cannot be None when path_prefix is None.")
)
load_dirname = path_prefix load_dirname = path_prefix
program_desc_str = model_filename program_desc_str = model_filename
params_filename = params_filename params_filename = params_filename
...@@ -297,18 +304,21 @@ def load_inference_model(path_prefix, executor, **configs): ...@@ -297,18 +304,21 @@ def load_inference_model(path_prefix, executor, **configs):
if model_filename is None: if model_filename is None:
model_path = os.path.join(path_prefix, "__model__") model_path = os.path.join(path_prefix, "__model__")
else: else:
model_path = os.path.join(path_prefix, model_filename + ".pdmodel") model_path = os.path.join(path_prefix,
model_filename + ".pdmodel")
if not os.path.exists(model_path): if not os.path.exists(model_path):
model_path = os.path.join(path_prefix, model_filename) model_path = os.path.join(path_prefix, model_filename)
# set params_path # set params_path
if params_filename is None: if params_filename is None:
params_path = os.path.join(path_prefix, "") params_path = os.path.join(path_prefix, "")
else: else:
params_path = os.path.join(path_prefix, params_filename + ".pdiparams") params_path = os.path.join(path_prefix,
params_filename + ".pdiparams")
if not os.path.exists(params_path): if not os.path.exists(params_path):
params_path = os.path.join(path_prefix, params_filename) params_path = os.path.join(path_prefix, params_filename)
_logger.warning("The old way to load inference model is deprecated." _logger.warning("The old way to load inference model is deprecated."
" model path: {}, params path: {}".format(model_path, params_path)) " model path: {}, params path: {}".format(
model_path, params_path))
with open(model_path, "rb") as f: with open(model_path, "rb") as f:
program_desc_str = f.read() program_desc_str = f.read()
load_dirname = os.path.dirname(params_path) load_dirname = os.path.dirname(params_path)
...@@ -328,4 +338,3 @@ def load_inference_model(path_prefix, executor, **configs): ...@@ -328,4 +338,3 @@ def load_inference_model(path_prefix, executor, **configs):
] ]
return [program, feed_target_names, fetch_targets] return [program, feed_target_names, fetch_targets]
...@@ -26,7 +26,7 @@ def fc(x, ...@@ -26,7 +26,7 @@ def fc(x,
bias_attr=None, bias_attr=None,
activation=None, activation=None,
name=None): name=None):
""" r"""
Fully-Connected layer can take a tensor or a list of tensor as its inputs. Fully-Connected layer can take a tensor or a list of tensor as its inputs.
It creates a 2-D weight tensor for each input tensor, which represents its It creates a 2-D weight tensor for each input tensor, which represents its
...@@ -180,7 +180,7 @@ def deform_conv2d(x, ...@@ -180,7 +180,7 @@ def deform_conv2d(x,
weight_attr=None, weight_attr=None,
bias_attr=None, bias_attr=None,
name=None): name=None):
""" r"""
Compute 2-D deformable convolution on 4-D input. Compute 2-D deformable convolution on 4-D input.
Given input image x, output feature map y, the deformable convolution operation can be expressed as follow: Given input image x, output feature map y, the deformable convolution operation can be expressed as follow:
......
...@@ -54,7 +54,7 @@ __all__ = [ ...@@ -54,7 +54,7 @@ __all__ = [
@dygraph_only @dygraph_only
def to_tensor(data, dtype=None, place=None, stop_gradient=True): def to_tensor(data, dtype=None, place=None, stop_gradient=True):
""" r"""
Constructs a ``paddle.Tensor`` or ``paddle.ComplexTensor`` from ``data`` , Constructs a ``paddle.Tensor`` or ``paddle.ComplexTensor`` from ``data`` ,
which can be scalar, tuple, list, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor. which can be scalar, tuple, list, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor.
...@@ -609,7 +609,7 @@ def _tril_triu_op(helper): ...@@ -609,7 +609,7 @@ def _tril_triu_op(helper):
def tril(x, diagonal=0, name=None): def tril(x, diagonal=0, name=None):
""" r"""
:alias_main: paddle.tril :alias_main: paddle.tril
:alias: paddle.tril,paddle.tensor.tril,paddle.tensor.creation.tril :alias: paddle.tril,paddle.tensor.tril,paddle.tensor.creation.tril
...@@ -680,7 +680,7 @@ def tril(x, diagonal=0, name=None): ...@@ -680,7 +680,7 @@ def tril(x, diagonal=0, name=None):
def triu(x, diagonal=0, name=None): def triu(x, diagonal=0, name=None):
""" r"""
:alias_main: paddle.triu :alias_main: paddle.triu
:alias: paddle.triu,paddle.tensor.triu,paddle.tensor.creation.triu :alias: paddle.triu,paddle.tensor.triu,paddle.tensor.creation.triu
......
...@@ -453,7 +453,7 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): ...@@ -453,7 +453,7 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None):
def dist(x, y, p=2): def dist(x, y, p=2):
""" r"""
This OP returns the p-norm of (x - y). It is not a norm in a strict sense, only as a measure This OP returns the p-norm of (x - y). It is not a norm in a strict sense, only as a measure
of distance. The shapes of x and y must be broadcastable. The definition is as follows, for of distance. The shapes of x and y must be broadcastable. The definition is as follows, for
...@@ -740,7 +740,7 @@ def cross(x, y, axis=None, name=None): ...@@ -740,7 +740,7 @@ def cross(x, y, axis=None, name=None):
def cholesky(x, upper=False, name=None): def cholesky(x, upper=False, name=None):
""" r"""
Computes the Cholesky decomposition of one symmetric positive-definite Computes the Cholesky decomposition of one symmetric positive-definite
matrix or batches of symmetric positive-definite matrice. matrix or batches of symmetric positive-definite matrice.
......
...@@ -169,7 +169,7 @@ def flip(x, axis, name=None): ...@@ -169,7 +169,7 @@ def flip(x, axis, name=None):
def flatten(x, start_axis=0, stop_axis=-1, name=None): def flatten(x, start_axis=0, stop_axis=-1, name=None):
""" r"""
**Flatten op** **Flatten op**
Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis. Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis.
...@@ -565,7 +565,7 @@ def unique(x, ...@@ -565,7 +565,7 @@ def unique(x,
axis=None, axis=None,
dtype="int64", dtype="int64",
name=None): name=None):
""" r"""
Returns the unique elements of `x` in ascending order. Returns the unique elements of `x` in ascending order.
Args: Args:
...@@ -946,7 +946,7 @@ def scatter(x, index, updates, overwrite=True, name=None): ...@@ -946,7 +946,7 @@ def scatter(x, index, updates, overwrite=True, name=None):
def scatter_nd_add(x, index, updates, name=None): def scatter_nd_add(x, index, updates, name=None):
""" r"""
**Scatter_nd_add Layer** **Scatter_nd_add Layer**
Output is obtained by applying sparse addition to a single value Output is obtained by applying sparse addition to a single value
......
...@@ -379,7 +379,7 @@ def floor_divide(x, y, name=None): ...@@ -379,7 +379,7 @@ def floor_divide(x, y, name=None):
def remainder(x, y, name=None): def remainder(x, y, name=None):
""" r"""
Mod two tensors element-wise. The equation is: Mod two tensors element-wise. The equation is:
.. math:: .. math::
...@@ -981,7 +981,7 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None): ...@@ -981,7 +981,7 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None):
def logsumexp(x, axis=None, keepdim=False, name=None): def logsumexp(x, axis=None, keepdim=False, name=None):
""" r"""
This OP calculates the log of the sum of exponentials of ``x`` along ``axis`` . This OP calculates the log of the sum of exponentials of ``x`` along ``axis`` .
.. math:: .. math::
...@@ -1281,7 +1281,7 @@ def min(x, axis=None, keepdim=False, name=None): ...@@ -1281,7 +1281,7 @@ def min(x, axis=None, keepdim=False, name=None):
def log1p(x, name=None): def log1p(x, name=None):
""" r"""
Calculates the natural log of the given input tensor, element-wise. Calculates the natural log of the given input tensor, element-wise.
.. math:: .. math::
Out = \\ln(x+1) Out = \\ln(x+1)
...@@ -1315,7 +1315,7 @@ def log1p(x, name=None): ...@@ -1315,7 +1315,7 @@ def log1p(x, name=None):
return out return out
def log2(x, name=None): def log2(x, name=None):
""" r"""
Calculates the log to the base 2 of the given input tensor, element-wise. Calculates the log to the base 2 of the given input tensor, element-wise.
.. math:: .. math::
...@@ -1365,7 +1365,7 @@ def log2(x, name=None): ...@@ -1365,7 +1365,7 @@ def log2(x, name=None):
def log10(x, name=None): def log10(x, name=None):
""" r"""
Calculates the log to the base 10 of the given input tensor, element-wise. Calculates the log to the base 10 of the given input tensor, element-wise.
.. math:: .. math::
...@@ -1947,7 +1947,7 @@ def sign(x, name=None): ...@@ -1947,7 +1947,7 @@ def sign(x, name=None):
def tanh(x, name=None): def tanh(x, name=None):
""" r"""
Tanh Activation Operator. Tanh Activation Operator.
.. math:: .. math::
......
...@@ -494,7 +494,7 @@ def sort(x, axis=-1, descending=False, name=None): ...@@ -494,7 +494,7 @@ def sort(x, axis=-1, descending=False, name=None):
def where(condition, x, y, name=None): def where(condition, x, y, name=None):
""" r"""
Return a tensor of elements selected from either $x$ or $y$, depending on $condition$. Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.
.. math:: .. math::
......
...@@ -93,7 +93,7 @@ class Imdb(Dataset): ...@@ -93,7 +93,7 @@ class Imdb(Dataset):
def _build_work_dict(self, cutoff): def _build_work_dict(self, cutoff):
word_freq = collections.defaultdict(int) word_freq = collections.defaultdict(int)
pattern = re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$") pattern = re.compile(r"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$")
for doc in self._tokenize(pattern): for doc in self._tokenize(pattern):
for word in doc: for word in doc:
word_freq[word] += 1 word_freq[word] += 1
...@@ -123,8 +123,8 @@ class Imdb(Dataset): ...@@ -123,8 +123,8 @@ class Imdb(Dataset):
return data return data
def _load_anno(self): def _load_anno(self):
pos_pattern = re.compile("aclImdb/{}/pos/.*\.txt$".format(self.mode)) pos_pattern = re.compile(r"aclImdb/{}/pos/.*\.txt$".format(self.mode))
neg_pattern = re.compile("aclImdb/{}/neg/.*\.txt$".format(self.mode)) neg_pattern = re.compile(r"aclImdb/{}/neg/.*\.txt$".format(self.mode))
UNK = self.word_idx['<unk>'] UNK = self.word_idx['<unk>']
......
#!/usr/bin/env python3.7 #!/usr/bin/env python3.7
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: skip-file # pylint: skip-file
import functools import functools
......
...@@ -42,11 +42,11 @@ Diff: set(['test_parallel_executor_crf']) ...@@ -42,11 +42,11 @@ Diff: set(['test_parallel_executor_crf'])
for l in fn.readlines(): for l in fn.readlines():
if l.find("Test ") != -1 and \ if l.find("Test ") != -1 and \
l.find("Passed") != -1: l.find("Passed") != -1:
m = re.search("Test\s+#[0-9]*\:\s([a-z0-9_]+)", escape(l)) m = re.search(r"Test\s+#[0-9]*\:\s([a-z0-9_]+)", escape(l))
passed.add(m.group(1)) passed.add(m.group(1))
if l.find("Start ") != -1: if l.find("Start ") != -1:
start_parts = escape(l).split(" ") start_parts = escape(l).split(" ")
m = re.search("Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l)) m = re.search(r"Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l))
started.add(m.group(1)) started.add(m.group(1))
print("Diff: ", started - passed) print("Diff: ", started - passed)
......
...@@ -101,7 +101,7 @@ class Docstring(object): ...@@ -101,7 +101,7 @@ class Docstring(object):
def _arg_with_type(self): def _arg_with_type(self):
for t in self.d['Args']: for t in self.d['Args']:
m = re.search('([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) m = re.search(r'([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t)
if m: if m:
self.args[m.group(1)] = m.group(2) self.args[m.group(1)] = m.group(2)
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
usage: coverage_diff.py info_file diff_file > > coverage-diff.info usage: coverage_diff.py info_file diff_file > > coverage-diff.info
""" """
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册