From 3815d7aa4012d00fdf38292fb1e14dde5d26945b Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 24 Nov 2020 14:53:51 +0800 Subject: [PATCH] Upgrade string literals to raw string (#28989) * upgrade comment string to raw string * fix string in * fix string with ' ' * revert update on comments * upgrade only necessary * fix sample code checker * fix comments with ''' --- paddle/scripts/conda_build.py | 21 +++- python/paddle/dataset/imdb.py | 10 +- python/paddle/dataset/tests/imdb_test.py | 12 +- .../fleet/base/distributed_strategy.py | 2 +- python/paddle/distributed/fleet/launch.py | 2 +- .../parameter_server_optimizer.py | 4 +- python/paddle/distributed/launch.py | 2 +- python/paddle/distribution.py | 12 +- python/paddle/fluid/clip.py | 6 +- python/paddle/fluid/contrib/layers/nn.py | 8 +- .../paddle/fluid/contrib/layers/rnn_impl.py | 6 +- .../paddle/fluid/contrib/memory_usage_calc.py | 2 +- .../slim/quantization/imperative/qat.py | 2 +- .../slim/quantization/imperative/quant_nn.py | 6 +- .../quantization/quant_int8_mkldnn_pass.py | 2 +- .../slim/quantization/quantization_pass.py | 2 +- .../paddle/fluid/contrib/utils/hdfs_utils.py | 6 +- python/paddle/fluid/core.py | 2 +- .../fluid/dataloader/dataloader_iter.py | 18 +-- python/paddle/fluid/distributed/downpour.py | 2 +- python/paddle/fluid/distributed/node.py | 8 +- python/paddle/fluid/dygraph/base.py | 2 +- .../fluid/dygraph/learning_rate_scheduler.py | 12 +- python/paddle/fluid/dygraph/nn.py | 24 ++-- python/paddle/fluid/dygraph/rnn.py | 4 +- python/paddle/fluid/framework.py | 4 +- .../fleet/parameter_server/pslib/node.py | 2 +- .../fluid/incubate/fleet/utils/fleet_util.py | 12 +- python/paddle/fluid/initializer.py | 4 +- python/paddle/fluid/input.py | 2 +- python/paddle/fluid/layer_helper_base.py | 2 +- python/paddle/fluid/layers/control_flow.py | 6 +- python/paddle/fluid/layers/detection.py | 14 +-- python/paddle/fluid/layers/distributions.py | 8 +- .../fluid/layers/learning_rate_scheduler.py | 2 +- python/paddle/fluid/layers/loss.py | 18 +-- python/paddle/fluid/layers/metric_op.py | 2 +- python/paddle/fluid/layers/nn.py | 106 +++++++++--------- python/paddle/fluid/layers/ops.py | 8 +- python/paddle/fluid/layers/rnn.py | 82 +++++++------- python/paddle/fluid/layers/sequence_lod.py | 16 +-- python/paddle/fluid/layers/tensor.py | 8 +- python/paddle/fluid/metrics.py | 4 +- python/paddle/fluid/nets.py | 6 +- python/paddle/fluid/optimizer.py | 32 +++--- python/paddle/fluid/param_attr.py | 2 +- python/paddle/fluid/reader.py | 2 +- python/paddle/fluid/regularizer.py | 6 +- .../unittests/dist_text_classification.py | 8 +- .../dygraph_to_static/simnet_dygraph_model.py | 2 +- .../simnet_dygraph_model_v2.py | 2 +- .../test_eager_deletion_recurrent_op.py | 2 +- .../tests/unittests/test_full_like_op.py | 3 +- .../fluid/tests/unittests/test_lrn_op.py | 2 +- .../tests/unittests/test_recurrent_op.py | 6 +- .../tests/unittests/test_require_version.py | 2 +- python/paddle/metric/metrics.py | 2 +- python/paddle/nn/functional/activation.py | 34 +++--- python/paddle/nn/functional/common.py | 4 +- python/paddle/nn/functional/conv.py | 12 +- python/paddle/nn/functional/extension.py | 5 +- python/paddle/nn/functional/input.py | 2 +- python/paddle/nn/functional/loss.py | 16 +-- python/paddle/nn/functional/norm.py | 4 +- python/paddle/nn/functional/vision.py | 6 +- python/paddle/nn/initializer/kaiming.py | 4 +- python/paddle/nn/initializer/xavier.py | 4 +- python/paddle/nn/layer/activation.py | 36 +++--- python/paddle/nn/layer/common.py | 6 +- python/paddle/nn/layer/conv.py | 12 +- python/paddle/nn/layer/distance.py | 2 +- python/paddle/nn/layer/loss.py | 16 +-- python/paddle/nn/layer/norm.py | 16 +-- python/paddle/nn/layer/pooling.py | 10 +- python/paddle/nn/layer/transformer.py | 20 ++-- python/paddle/nn/utils/weight_norm_hook.py | 2 +- python/paddle/optimizer/adadelta.py | 2 +- python/paddle/optimizer/adagrad.py | 2 +- python/paddle/optimizer/adam.py | 2 +- python/paddle/optimizer/adamax.py | 2 +- python/paddle/optimizer/adamw.py | 2 +- python/paddle/optimizer/lr.py | 14 +-- python/paddle/optimizer/momentum.py | 2 +- python/paddle/optimizer/optimizer.py | 2 +- python/paddle/optimizer/rmsprop.py | 2 +- python/paddle/optimizer/sgd.py | 2 +- python/paddle/reader/__init__.py | 2 +- python/paddle/regularizer.py | 4 +- python/paddle/static/io.py | 45 +++++--- python/paddle/static/nn/common.py | 4 +- python/paddle/tensor/creation.py | 6 +- python/paddle/tensor/linalg.py | 4 +- python/paddle/tensor/manipulation.py | 6 +- python/paddle/tensor/math.py | 12 +- python/paddle/tensor/search.py | 2 +- python/paddle/text/datasets/imdb.py | 6 +- r/example/mobilenet.py | 15 +++ tools/check_ctest_hung.py | 4 +- tools/codestyle/docstring_checker.py | 2 +- tools/coverage/coverage_diff.py | 14 +++ tools/coverage/coverage_diff_list.py | 14 +++ tools/coverage/coverage_lines.py | 14 +++ tools/coverage/cuda_clean.py | 14 +++ tools/coverage/gcda_clean.py | 14 +++ tools/coverage/pull_request.py | 14 +++ tools/coverage/python_coverage.py | 14 +++ tools/get_quick_disable_lt.py | 2 +- tools/sampcd_processor.py | 24 ++-- tools/summary_env.py | 2 +- 109 files changed, 586 insertions(+), 449 deletions(-) diff --git a/paddle/scripts/conda_build.py b/paddle/scripts/conda_build.py index 0a0736f35a5..395a071ed13 100644 --- a/paddle/scripts/conda_build.py +++ b/paddle/scripts/conda_build.py @@ -1,4 +1,19 @@ #!/bin/python + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # import platform from sys import argv @@ -120,7 +135,7 @@ python setup.py install self.py_str = ["py27", "py35", "py36", "py37"] self.pip_end = ".whl --no-deps" self.pip_prefix_linux = "pip install /package/paddlepaddle" - self.pip_prefix_windows = "pip install C:\package\paddlepaddle" + self.pip_prefix_windows = r"pip install C:\package\paddlepaddle" self.pip_gpu = "_gpu-" self.pip_cpu = "-" self.mac_pip = [ @@ -216,7 +231,7 @@ package: - matplotlib""" if not (cuda_str == None): meta_str = meta_str + cuda_str - + blt_str = var.blt_const + blt_var if (python_str == var.python27): blt_str = blt_str + """ @@ -224,7 +239,7 @@ package: else: meta_str = meta_str + """ - opencv>=3.4.2""" - + meta_str = meta_str + var.test + var.about meta_filename = "meta.yaml" build_filename = "bld.bat" diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py index e5a3b6074c9..dab3c964cc6 100644 --- a/python/paddle/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -116,8 +116,8 @@ def train(word_idx): :rtype: callable """ return reader_creator( - re.compile("aclImdb/train/pos/.*\.txt$"), - re.compile("aclImdb/train/neg/.*\.txt$"), word_idx) + re.compile(r"aclImdb/train/pos/.*\.txt$"), + re.compile(r"aclImdb/train/neg/.*\.txt$"), word_idx) @deprecated( @@ -137,8 +137,8 @@ def test(word_idx): :rtype: callable """ return reader_creator( - re.compile("aclImdb/test/pos/.*\.txt$"), - re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) + re.compile(r"aclImdb/test/pos/.*\.txt$"), + re.compile(r"aclImdb/test/neg/.*\.txt$"), word_idx) @deprecated( @@ -153,7 +153,7 @@ def word_dict(): :rtype: dict """ return build_dict( - re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) + re.compile(r"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) @deprecated( diff --git a/python/paddle/dataset/tests/imdb_test.py b/python/paddle/dataset/tests/imdb_test.py index 415947e3477..613c5f8edb2 100644 --- a/python/paddle/dataset/tests/imdb_test.py +++ b/python/paddle/dataset/tests/imdb_test.py @@ -18,13 +18,13 @@ import paddle.dataset.imdb import unittest import re -TRAIN_POS_PATTERN = re.compile("aclImdb/train/pos/.*\.txt$") -TRAIN_NEG_PATTERN = re.compile("aclImdb/train/neg/.*\.txt$") -TRAIN_PATTERN = re.compile("aclImdb/train/.*\.txt$") +TRAIN_POS_PATTERN = re.compile(r"aclImdb/train/pos/.*\.txt$") +TRAIN_NEG_PATTERN = re.compile(r"aclImdb/train/neg/.*\.txt$") +TRAIN_PATTERN = re.compile(r"aclImdb/train/.*\.txt$") -TEST_POS_PATTERN = re.compile("aclImdb/test/pos/.*\.txt$") -TEST_NEG_PATTERN = re.compile("aclImdb/test/neg/.*\.txt$") -TEST_PATTERN = re.compile("aclImdb/test/.*\.txt$") +TEST_POS_PATTERN = re.compile(r"aclImdb/test/pos/.*\.txt$") +TEST_NEG_PATTERN = re.compile(r"aclImdb/test/neg/.*\.txt$") +TEST_PATTERN = re.compile(r"aclImdb/test/.*\.txt$") class TestIMDB(unittest.TestCase): diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 71eca424fe6..46ccb4663e8 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -862,7 +862,7 @@ class DistributedStrategy(object): @property def dgc_configs(self): - """ + r""" Set Deep Gradient Compression training configurations. In general, dgc has serveral configurable settings that can be configured through a dict. diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 00bec671d4b..c48ce1a0f33 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" +r""" fleetrun is a module that spawns multiple distributed process on each training node for gpu training and cpu training. Usage: diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index 10b0c82c0ee..3135b69d004 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -158,13 +158,13 @@ class ParameterServerOptimizer(MetaOptimizerBase): ['vm_stat'], stdout=subprocess.PIPE).communicate()[0] # Process vm_stat vmLines = vm.split('\n') - sep = re.compile(':[\s]+') + sep = re.compile(r':[\s]+') vmStats = {} for row in range(1, len(vmLines) - 2): rowText = vmLines[row].strip() rowElements = sep.split(rowText) vmStats[(rowElements[0] - )] = int(rowElements[1].strip('\.')) * 4096 + )] = int(rowElements[1].strip(r'\.')) * 4096 return vmStats["Pages free"] elif platform.system() == "Linux": mems = {} diff --git a/python/paddle/distributed/launch.py b/python/paddle/distributed/launch.py index 9b969cf3002..060e742ad6c 100644 --- a/python/paddle/distributed/launch.py +++ b/python/paddle/distributed/launch.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" +r""" paddle.distributed.launch is a module that spawns multiple distributed process on each training node for gpu training. Usage: diff --git a/python/paddle/distribution.py b/python/paddle/distribution.py index e9a15feb517..ad134b4591e 100644 --- a/python/paddle/distribution.py +++ b/python/paddle/distribution.py @@ -166,7 +166,7 @@ class Distribution(object): class Uniform(Distribution): - """Uniform distribution with `low` and `high` parameters. + r"""Uniform distribution with `low` and `high` parameters. Mathematical Details @@ -374,7 +374,7 @@ class Uniform(Distribution): return elementwise_div((lb * ub), (self.high - self.low), name=name) def entropy(self): - """Shannon entropy in nats. + r"""Shannon entropy in nats. The entropy is @@ -391,7 +391,7 @@ class Uniform(Distribution): class Normal(Distribution): - """The Normal distribution with location `loc` and `scale` parameters. + r"""The Normal distribution with location `loc` and `scale` parameters. Mathematical details @@ -534,7 +534,7 @@ class Normal(Distribution): return output def entropy(self): - """Shannon entropy in nats. + r"""Shannon entropy in nats. The entropy is @@ -599,7 +599,7 @@ class Normal(Distribution): name=name) def kl_divergence(self, other): - """The KL-divergence between two normal distributions. + r"""The KL-divergence between two normal distributions. The probability density function (pdf) is @@ -644,7 +644,7 @@ class Normal(Distribution): class Categorical(Distribution): - """ + r""" Categorical distribution is a discrete probability distribution that describes the possible results of a random variable that can take on one of K possible categories, with the probability of each category diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index f20716c3a15..8fd01509331 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -40,7 +40,7 @@ class BaseErrorClipAttr(object): class ErrorClipByValue(BaseErrorClipAttr): - """ + r""" Clips tensor values to the range [min, max]. Given a tensor ``t`` (see Examples below), this operation clips its value \ @@ -241,7 +241,7 @@ class ClipGradByValue(ClipGradBase): class ClipGradByNorm(ClipGradBase): - """ + r""" Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` . - If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio. @@ -343,7 +343,7 @@ class ClipGradByNorm(ClipGradBase): class ClipGradByGlobalNorm(ClipGradBase): - """ + r""" Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in :math:`t\_list` , and limit it to ``clip_norm`` . diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index d0543bb90dd..f3f8c815b00 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -137,7 +137,7 @@ def var_conv_2d(input, act=None, dtype='float32', name=None): - """ + r""" The var_conv_2d layer calculates the output base on the :attr:`input` with variable length, row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`, and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with @@ -477,7 +477,7 @@ def fused_embedding_seq_pool(input, combiner='sum', param_attr=None, dtype='float32'): - """ + r""" **Embedding Sequence pool** This layer is the fusion of lookup table and sequence_pool. @@ -1442,7 +1442,7 @@ def batch_fc(input, param_size, param_attr, bias_size, bias_attr, act=None): def _pull_box_extended_sparse(input, size, extend_size=64, dtype='float32'): - """ + r""" **Pull Box Extended Sparse Layer** This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in BoxPS lookup table. The result of this lookup is the embedding of each ID in the @@ -1640,7 +1640,7 @@ def fused_bn_add_act(x, moving_variance_name=None, act=None, name=None): - """ + r""" This Op performs batch norm on input x, and adds the result to input y. Then it performs activation on the sum. The data format of inputs must be NHWC `[batch, in_height, in_width, in_channels]`. diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py index 4e304a393f8..a2dd0835b60 100644 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ b/python/paddle/fluid/contrib/layers/rnn_impl.py @@ -175,7 +175,7 @@ def basic_gru(input, activation=None, dtype='float32', name='basic_gru'): - """ + r""" GRU implementation using basic operator, supports multiple layers and bidirectional gru. .. math:: @@ -418,7 +418,7 @@ def basic_lstm(input, forget_bias=1.0, dtype='float32', name='basic_lstm'): - """ + r""" LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM. .. math:: @@ -697,7 +697,7 @@ def basic_lstm(input, class BasicLSTMUnit(Layer): - """ + r""" **** BasicLSTMUnit class, Using basic operator to build LSTM The algorithm can be described as the code below. diff --git a/python/paddle/fluid/contrib/memory_usage_calc.py b/python/paddle/fluid/contrib/memory_usage_calc.py index b5d85616cf0..24e39d7ac61 100644 --- a/python/paddle/fluid/contrib/memory_usage_calc.py +++ b/python/paddle/fluid/contrib/memory_usage_calc.py @@ -44,7 +44,7 @@ DEBUG = False def memory_usage(program, batch_size): - """ + r""" Get the estimate memory usage of program with input batch size. Args: diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index cae24177232..7364655107b 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -64,7 +64,7 @@ class ImperativeQuantAware(object): act_preprocess_layer=None, weight_quantize_layer=None, act_quantize_layer=None): - """ + r""" The constructor for ImperativeQuantAware. Args: diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index 79138febd0c..5acc4c30bc0 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -30,7 +30,7 @@ __all__ = [ class FakeQuantMovingAverage(layers.Layer): - """ + r""" FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant. Its computational formula is described as below: @@ -128,7 +128,7 @@ class FakeQuantMovingAverage(layers.Layer): class FakeQuantAbsMax(layers.Layer): - """ + r""" FakeQuantAbsMax layer does the abs_max quant and then dequant. Its computational formula is described as below: @@ -545,7 +545,7 @@ class QuantizedLinear(layers.Layer): class MovingAverageAbsMaxScale(layers.Layer): def __init__(self, name=None, moving_rate=0.9, dtype='float32'): - """ + r""" MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer. Its computational formula is described as below: diff --git a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py index a25abd9ff09..d31dc35d143 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant_int8_mkldnn_pass.py @@ -37,7 +37,7 @@ class QuantInt8MkldnnPass(object): """ def __init__(self, _scope=None, _place=None): - """ + r""" Args: scope(fluid.Scope): scope is used to initialize the new parameters. place(fluid.CPUPlace): place is used to initialize the new parameters. diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py index 68bf9ecd80b..219025269fe 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py @@ -239,7 +239,7 @@ class QuantizationTransformPass(object): act_preprocess_func=None, optimizer_func=None, executor=None): - """ + r""" Constructor. Args: diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py index 2de4f82bd14..9572552f0f2 100644 --- a/python/paddle/fluid/contrib/utils/hdfs_utils.py +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -33,7 +33,7 @@ _logger = get_logger( class HDFSClient(object): - """ + r""" A tool of HDFS Args: @@ -376,7 +376,7 @@ class HDFSClient(object): _logger.info("HDFS list path: {} successfully".format(hdfs_path)) ret_lines = [] - regex = re.compile('\s+') + regex = re.compile(r'\s+') out_lines = output.strip().split("\n") for line in out_lines: re_line = regex.split(line) @@ -418,7 +418,7 @@ class HDFSClient(object): _logger.info("HDFS list all files: {} successfully".format( hdfs_path)) lines = [] - regex = re.compile('\s+') + regex = re.compile(r'\s+') out_lines = output.strip().split("\n") for line in out_lines: re_line = regex.split(line) diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index ad116c25970..224a021cd6a 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -224,7 +224,7 @@ def less_than_ver(a, b): import operator def to_list(s): - s = re.sub('(\.0+)+$', '', s) + s = re.sub(r'(\.0+)+$', '', s) return [int(x) for x in s.split('.')] return operator.lt(to_list(a), to_list(b)) diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index ee30484ae9a..ea89b09d2bf 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -101,10 +101,11 @@ class _DatasetKind(object): ITER = 1 @staticmethod - def create_fetcher(kind, dataset, auto_collate_batch, collate_fn, drop_last): + def create_fetcher(kind, dataset, auto_collate_batch, collate_fn, + drop_last): if kind == _DatasetKind.MAP: - return _MapDatasetFetcher(dataset, auto_collate_batch, - collate_fn, drop_last) + return _MapDatasetFetcher(dataset, auto_collate_batch, collate_fn, + drop_last) elif kind == _DatasetKind.ITER: return _IterableDatasetFetcher(dataset, auto_collate_batch, collate_fn, drop_last) @@ -240,7 +241,8 @@ class _DataLoaderIterBase(object): if self._dataset_kind == _DatasetKind.MAP: self._sampler_iter = iter(list(range(len(self._dataset)))) else: - self._sampler_iter = iter(_InfiniteIterableSampler(self._dataset, 1)) + self._sampler_iter = iter( + _InfiniteIterableSampler(self._dataset, 1)) self._collate_fn = loader.collate_fn # LoDTensorBlockingQueue instance for create_py_reader and a thread @@ -380,8 +382,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase): # NOTE(chenweihang): _worker_loop must be top level method to be pickled def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, - auto_collate_batch, collate_fn, init_fn, worker_id, num_workers, - use_shared_memory): + auto_collate_batch, collate_fn, init_fn, worker_id, + num_workers, use_shared_memory): try: # NOTE: [ mmap files clear ] When the child process exits unexpectedly, # some shared memory objects may have been applied for but have not yet @@ -400,8 +402,8 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event, try: if init_fn is not None: init_fn(worker_id) - fetcher = _DatasetKind.create_fetcher(dataset_kind, dataset, - auto_collate_batch, collate_fn, True) + fetcher = _DatasetKind.create_fetcher( + dataset_kind, dataset, auto_collate_batch, collate_fn, True) except: init_exception = Exception("init_fn failed in worker {}: " \ "{}".format(worker_id, sys.exc_info())) diff --git a/python/paddle/fluid/distributed/downpour.py b/python/paddle/fluid/distributed/downpour.py index 61e508ea72e..89e9a6a9076 100644 --- a/python/paddle/fluid/distributed/downpour.py +++ b/python/paddle/fluid/distributed/downpour.py @@ -22,7 +22,7 @@ from google.protobuf import text_format class DownpourSGD(object): - """ + r""" Distributed optimizer of downpour stochastic gradient descent Standard implementation of Google's Downpour SGD in Large Scale Distributed Deep Networks diff --git a/python/paddle/fluid/distributed/node.py b/python/paddle/fluid/distributed/node.py index 41e0d64e0b7..a15f94f4d17 100644 --- a/python/paddle/fluid/distributed/node.py +++ b/python/paddle/fluid/distributed/node.py @@ -52,7 +52,7 @@ class DownpourServer(Server): def add_sparse_table(self, table_id, learning_rate, slot_key_vars, slot_value_var): - """ + r""" Args: table_id(int): id of sparse params table learning_rate(float): the learning rate used to update parameters. \ @@ -84,7 +84,7 @@ class DownpourServer(Server): table.accessor.downpour_accessor_param.delete_threshold = 0.8 def add_dense_table(self, table_id, learning_rate, param_var, grad_var): - """ + r""" Args: table_id(int): id of sparse params table learning_rate(float): the learning rate used to update parameters. \ @@ -135,7 +135,7 @@ class DownpourWorker(Worker): def add_sparse_table(self, table_id, learning_rate, slot_key_vars, slot_value_vars): - """ + r""" Args: table_id(int): id of sparse params table learning_rate(float): the learning rate used to update parameters. \ @@ -153,7 +153,7 @@ class DownpourWorker(Worker): [var.name + "@GRAD" for var in slot_value_vars]) def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars): - """ + r""" Args: table_id(int): id of sparse params table learning_rate(float): the learning rate used to update parameters. \ diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index a26b903493a..397f873f961 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -593,7 +593,7 @@ def grad(outputs, @framework.dygraph_only def to_variable(value, name=None, zero_copy=None, dtype=None): - """ + r""" :api_attr: imperative The API will create a ``Variable`` or ``ComplexVariable`` object from diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index cd6af6fd5b5..a6c1993dbbf 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -183,7 +183,7 @@ class PiecewiseDecay(LearningRateDecay): class NaturalExpDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies natural exponential decay to the initial learning rate. @@ -266,7 +266,7 @@ class NaturalExpDecay(LearningRateDecay): class ExponentialDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies exponential decay to the learning rate. @@ -348,7 +348,7 @@ class ExponentialDecay(LearningRateDecay): class InverseTimeDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies inverse time decay to the initial learning rate. @@ -426,7 +426,7 @@ class InverseTimeDecay(LearningRateDecay): class PolynomialDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies polynomial decay to the initial learning rate. @@ -520,7 +520,7 @@ class PolynomialDecay(LearningRateDecay): class CosineDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies cosine decay to the learning rate. @@ -578,7 +578,7 @@ class CosineDecay(LearningRateDecay): class NoamDecay(LearningRateDecay): - """ + r""" :api_attr: imperative Applies Noam decay to the initial learning rate. diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index 3c75b304028..0f92c32f252 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -42,7 +42,7 @@ __all__ = [ class Conv2D(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``Conv2D`` class. For more details, refer to code examples. The convolution2D layer calculates the output based on the input, filter @@ -282,7 +282,7 @@ class Conv2D(layers.Layer): class Conv3D(layers.Layer): - """ + r""" **Convlution3D Layer** The convolution3D layer calculates the output based on the input, filter @@ -484,7 +484,7 @@ class Conv3D(layers.Layer): class Conv3DTranspose(layers.Layer): - """ + r""" **Convlution3D transpose layer** The convolution3D transpose layer calculates the output based on the input, @@ -701,7 +701,7 @@ class Conv3DTranspose(layers.Layer): class Pool2D(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``Pool2D`` class. For more details, refer to code examples. @@ -1009,7 +1009,7 @@ class Linear(layers.Layer): class InstanceNorm(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``InstanceNorm`` class. For more details, refer to code examples. @@ -1143,7 +1143,7 @@ class InstanceNorm(layers.Layer): class BatchNorm(layers.Layer): - """ + r""" :alias_main: paddle.nn.BatchNorm :alias: paddle.nn.BatchNorm,paddle.nn.layer.BatchNorm,paddle.nn.layer.norm.BatchNorm :old_api: paddle.fluid.dygraph.BatchNorm @@ -1492,7 +1492,7 @@ class Dropout(layers.Layer): class Embedding(layers.Layer): - """ + r""" :alias_main: paddle.nn.Embedding :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding :old_api: paddle.fluid.dygraph.Embedding @@ -1652,7 +1652,7 @@ class Embedding(layers.Layer): class LayerNorm(layers.Layer): - """ + r""" :alias_main: paddle.nn.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm :old_api: paddle.fluid.dygraph.LayerNorm @@ -2242,7 +2242,7 @@ class NCE(layers.Layer): class PRelu(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``PRelu`` class. For more details, refer to code examples. It implements three activation methods of the ``PRelu`` activation function. @@ -2350,7 +2350,7 @@ class PRelu(layers.Layer): class BilinearTensorProduct(layers.Layer): - """ + r""" **Add Bilinear Tensor Product Layer** @@ -2467,7 +2467,7 @@ class BilinearTensorProduct(layers.Layer): class Conv2DTranspose(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``Conv2DTranspose`` class. For more details, refer to code examples. The convolution2D transpose layer calculates the output based on the input, @@ -2979,7 +2979,7 @@ class GroupNorm(layers.Layer): class SpectralNorm(layers.Layer): - """ + r""" :alias_main: paddle.nn.SpectralNorm :alias: paddle.nn.SpectralNorm,paddle.nn.layer.SpectralNorm,paddle.nn.layer.norm.SpectralNorm :old_api: paddle.fluid.dygraph.SpectralNorm diff --git a/python/paddle/fluid/dygraph/rnn.py b/python/paddle/fluid/dygraph/rnn.py index 9df4188fb7e..05a76a8d125 100644 --- a/python/paddle/fluid/dygraph/rnn.py +++ b/python/paddle/fluid/dygraph/rnn.py @@ -20,7 +20,7 @@ __all__ = ['LSTMCell', 'GRUCell'] class LSTMCell(Layer): - """ + r""" LSTMCell implementation using basic operators. There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation. The algorithm can be described as the equations below. @@ -236,7 +236,7 @@ class LSTMCell(Layer): class GRUCell(Layer): - """ + r""" GRU implementation using basic operators. There are two GRUCell version, the default one is compatible with CUDNN GRU implementation. The algorithm can be described as the equations below. diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 49c5f9f5b8e..28891871777 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -2255,7 +2255,7 @@ class Operator(object): return self.desc.type() def input(self, name): - """ + r""" Get the input arguments according to the input parameter name. Args: @@ -2306,7 +2306,7 @@ class Operator(object): return self.desc.output_arg_names() def output(self, name): - """ + r""" Get output arguments by the output parameter name. Args: diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py index 4b600150e04..0853d05ef3b 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/node.py @@ -527,7 +527,7 @@ class DownpourWorker(Worker): def add_dense_table(self, table_id, learning_rate, param_vars, grad_vars, dense_start_table_id, sparse_table_names): - """ + r""" Args: table_id(int): id of sparse params table learning_rate(float): the learning rate used to update parameters. \ diff --git a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py index c126f06de9d..dd968a70e8a 100644 --- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py +++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py @@ -153,7 +153,7 @@ class FleetUtil(object): stat_pos="_generated_var_2", stat_neg="_generated_var_3", print_prefix=""): - """ + r""" Print global auc of all distributed workers. Args: @@ -1073,7 +1073,7 @@ class FleetUtil(object): hadoop_fs_name, hadoop_fs_ugi, hadoop_home="$HADOOP_HOME"): - """ + r""" get last saved base xbox info from xbox_base_done.txt Args: @@ -1118,7 +1118,7 @@ class FleetUtil(object): hadoop_fs_name, hadoop_fs_ugi, hadoop_home="$HADOOP_HOME"): - """ + r""" get last saved xbox info from xbox_patch_done.txt Args: @@ -1164,7 +1164,7 @@ class FleetUtil(object): hadoop_fs_name, hadoop_fs_ugi, hadoop_home="$HADOOP_HOME"): - """ + r""" get last saved model info from donefile.txt Args: @@ -1279,7 +1279,7 @@ class FleetUtil(object): q_name="q", pos_ins_num_name="pos", total_ins_num_name="total"): - """ + r""" get global metrics, including auc, bucket_error, mae, rmse, actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num. @@ -1469,7 +1469,7 @@ class FleetUtil(object): pos_ins_num_name="pos", total_ins_num_name="total", print_prefix=""): - """ + r""" print global metrics, including auc, bucket_error, mae, rmse, actual_ctr, predicted_ctr, copc, mean_predict_qvalue, total_ins_num. diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 30932d0c8b5..86fab981127 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -459,7 +459,7 @@ class TruncatedNormalInitializer(Initializer): class XavierInitializer(Initializer): - """ + r""" This class implements the Xavier weight initializer from the paper `Understanding the difficulty of training deep feedforward neural networks `_ @@ -595,7 +595,7 @@ class XavierInitializer(Initializer): class MSRAInitializer(Initializer): - """Implements the MSRA initializer a.k.a. Kaiming Initializer + r"""Implements the MSRA initializer a.k.a. Kaiming Initializer This class implements the weight initialization from the paper `Delving Deep into Rectifiers: Surpassing Human-Level Performance on diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 0e3ee46fa46..e56d1876e3f 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -137,7 +137,7 @@ def embedding(input, padding_idx=None, param_attr=None, dtype='float32'): - """ + r""" :api_attr: Static Graph The operator is used to lookup embeddings vector of ids provided by :attr:`input` . diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py index 6e38c855562..5ee46a68fb7 100644 --- a/python/paddle/fluid/layer_helper_base.py +++ b/python/paddle/fluid/layer_helper_base.py @@ -59,7 +59,7 @@ class LayerHelperBase(object): return cls.__dtype def to_variable(self, value, name=None): - """ + r""" The API will create a ``Variable`` object from numpy\.ndarray or Variable object. Parameters: diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 65ca5a211e3..b5f66a1308e 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -3012,7 +3012,7 @@ class DynamicRNN(object): self.mem_link = [] def step_input(self, x, level=0): - """ + r""" This function is used to set sequence x as DynamicRNN's input. The maximum sequence length in x determines the number of time steps the RNN unit will be executed. DynamicRNN can take multiple inputs. @@ -3144,7 +3144,7 @@ class DynamicRNN(object): return array_read(array=input_array, i=self.step_idx) def static_input(self, x): - """ + r""" This function is used to set x as DynamicRNN's static input. It is optional. - Case 1, set static input with LoD @@ -3348,7 +3348,7 @@ class DynamicRNN(object): value=0.0, need_reorder=False, dtype='float32'): - """ + r""" Create a memory Variable for DynamicRNN to deliver data cross time steps. It can be initialized by an existing Tensor or a constant Tensor of given dtype and shape. diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index f7e79f79f8b..ce29b64ce43 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -77,7 +77,7 @@ def retinanet_target_assign(bbox_pred, num_classes=1, positive_overlap=0.5, negative_overlap=0.4): - """ + r""" **Target Assign Layer for the detector RetinaNet.** This OP finds out positive and negative samples from all anchors @@ -471,7 +471,7 @@ def rpn_target_assign(bbox_pred, def sigmoid_focal_loss(x, label, fg_num, gamma=2.0, alpha=0.25): - """ + r""" :alias_main: paddle.nn.functional.sigmoid_focal_loss :alias: paddle.nn.functional.sigmoid_focal_loss,paddle.nn.functional.loss.sigmoid_focal_loss :old_api: paddle.fluid.layers.sigmoid_focal_loss @@ -821,7 +821,7 @@ def box_coder(prior_box, box_normalized=True, name=None, axis=0): - """ + r""" **Box Coder Layer** @@ -1523,7 +1523,7 @@ def ssd_loss(location, mining_type='max_negative', normalize=True, sample_size=None): - """ + r""" :alias_main: paddle.nn.functional.ssd_loss :alias: paddle.nn.functional.ssd_loss,paddle.nn.functional.loss.ssd_loss :old_api: paddle.fluid.layers.ssd_loss @@ -1930,7 +1930,7 @@ def density_prior_box(input, offset=0.5, flatten_to_2d=False, name=None): - """ + r""" This op generates density prior boxes for SSD(Single Shot MultiBox Detector) algorithm. Each position of the input produce N prior boxes, N is @@ -2741,7 +2741,7 @@ def generate_proposal_labels(rpn_rois, def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois, labels_int32, num_classes, resolution): - """ + r""" **Generate Mask Labels for Mask-RCNN** @@ -3671,7 +3671,7 @@ def distribute_fpn_proposals(fpn_rois, refer_scale, rois_num=None, name=None): - """ + r""" **This op only takes LoDTensor as input.** In Feature Pyramid Networks (FPN) models, it is needed to distribute all proposals into different FPN diff --git a/python/paddle/fluid/layers/distributions.py b/python/paddle/fluid/layers/distributions.py index 81bea3898be..4e4c8dfd2a0 100644 --- a/python/paddle/fluid/layers/distributions.py +++ b/python/paddle/fluid/layers/distributions.py @@ -113,7 +113,7 @@ class Distribution(object): class Uniform(Distribution): - """Uniform distribution with `low` and `high` parameters. + r"""Uniform distribution with `low` and `high` parameters. Mathematical Details @@ -258,7 +258,7 @@ class Uniform(Distribution): class Normal(Distribution): - """The Normal distribution with location `loc` and `scale` parameters. + r"""The Normal distribution with location `loc` and `scale` parameters. Mathematical details @@ -423,7 +423,7 @@ class Normal(Distribution): class Categorical(Distribution): - """ + r""" Categorical distribution is a discrete probability distribution that describes the possible results of a random variable that can take on one of K possible categories, with the probability of each category @@ -529,7 +529,7 @@ class Categorical(Distribution): class MultivariateNormalDiag(Distribution): - """ + r""" A multivariate normal (also called Gaussian) distribution parameterized by a mean vector and a covariance matrix. diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 2710ab12cd3..26f08a2356d 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -440,7 +440,7 @@ Applies piecewise decay to the initial learning rate. def cosine_decay(learning_rate, step_each_epoch, epochs): - """ + r""" Applies cosine decay to the learning rate. diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 99801514f47..45f3de2d99a 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -57,7 +57,7 @@ def center_loss(input, alpha, param_attr, update_center=True): - """ + r""" :api_attr: Static Graph **Center loss Cost layer** @@ -151,7 +151,7 @@ def center_loss(input, def bpr_loss(input, label, name=None): - """ + r""" **Bayesian Personalized Ranking Loss Operator** @@ -203,7 +203,7 @@ def bpr_loss(input, label, name=None): def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): - """ + r""" :alias_main: paddle.nn.functional.cross_entropy :alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy :old_api: paddle.fluid.layers.cross_entropy @@ -300,7 +300,7 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): def square_error_cost(input, label): - """ + r""" This op accepts input predictions and target label and returns the squared error cost. @@ -1185,7 +1185,7 @@ def softmax_with_cross_entropy(logits, numeric_stable_mode=True, return_softmax=False, axis=-1): - """ + r""" :alias_main: paddle.nn.functional.softmax_with_cross_entropy :alias: paddle.nn.functional.softmax_with_cross_entropy,paddle.nn.functional.loss.softmax_with_cross_entropy :old_api: paddle.fluid.layers.softmax_with_cross_entropy @@ -1312,7 +1312,7 @@ def softmax_with_cross_entropy(logits, def rank_loss(label, left, right, name=None): - """ + r""" This operator implements the sort loss layer in the RankNet model. RankNet is a pairwise ranking model with a training sample consisting of a pair of documents (A and B), The label (P) @@ -1375,7 +1375,7 @@ def rank_loss(label, left, right, name=None): def margin_rank_loss(label, left, right, margin=0.1, name=None): - """ + r""" Margin Ranking Loss Layer for ranking problem, which compares left score and right score passed in. The ranking loss can be defined as following equation: @@ -1551,7 +1551,7 @@ def teacher_student_sigmoid_loss(input, def huber_loss(input, label, delta): - """ + r""" This operator computes the Huber loss between input and label. Huber loss is commonly used in regression tasks. Compared to square_error_cost, Huber loss is more robust and less sensitivity to outliers. @@ -1681,7 +1681,7 @@ from .control_flow import equal def npair_loss(anchor, positive, labels, l2_reg=0.002): - ''' + r''' Read `Improved Deep Metric Learning with Multi class N pair Loss Objective\ `_ @@ -14964,7 +14964,7 @@ def mish(x, threshold=20, name=None): def gather_tree(ids, parents): - """ + r""" To be used after beam search. After beam search, we get selected ids at each time step and the corresponding parents in the search tree. Both ids and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index de0fbb16f62..72dc4a91608 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -413,7 +413,7 @@ def softshrink(x, alpha=None): return _softshrink_(**kwargs) -softshrink.__doc__ = """ +softshrink.__doc__ = r""" :alias_main: paddle.nn.functional.softshrink :alias: paddle.nn.functional.softshrink,paddle.nn.functional.activation.softshrink :old_api: paddle.fluid.layers.softshrink @@ -530,7 +530,7 @@ def thresholded_relu(x, threshold=None): return _thresholded_relu_(**kwargs) -thresholded_relu.__doc__ = """ +thresholded_relu.__doc__ = r""" :alias_main: paddle.nn.functional.thresholded_relu :alias: paddle.nn.functional.thresholded_relu,paddle.nn.functional.activation.thresholded_relu :old_api: paddle.fluid.layers.thresholded_relu @@ -617,7 +617,7 @@ def gelu(x, approximate=False): return _gelu_(**kwargs) -gelu.__doc__ = """ +gelu.__doc__ = r""" :strong:`GeLU Activation Operator` For more details, see [Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415). @@ -701,7 +701,7 @@ def erf(x, name=None): return _erf_(**kwargs) -erf.__doc__ = """ +erf.__doc__ = r""" :strong:`Erf Operator` For more details, see [Error function](https://en.wikipedia.org/wiki/Error_function). diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 05272a7cefb..2f11603d484 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -67,7 +67,7 @@ class RNNCell(object): """ def call(self, inputs, states, **kwargs): - """ + r""" Every cell must implement this method to do the calculations mapping the inputs and states to the output and new states. @@ -97,7 +97,7 @@ class RNNCell(object): dtype='float32', init_value=0, batch_dim_idx=0): - """ + r""" Generate initialized states according to provided shape, data type and value. @@ -225,7 +225,7 @@ class RNNCell(object): class GRUCell(RNNCell): - """ + r""" :api_attr: Static Graph Gated Recurrent Unit cell. It is a wrapper for @@ -287,7 +287,7 @@ class GRUCell(RNNCell): activation, dtype) def call(self, inputs, states): - """ + r""" Perform calculations of GRU. Parameters: @@ -323,7 +323,7 @@ class GRUCell(RNNCell): class LSTMCell(RNNCell): - """ + r""" :api_attr: Static Graph Long-Short Term Memory cell. It is a wrapper for @@ -390,7 +390,7 @@ class LSTMCell(RNNCell): activation, forget_bias, dtype) def call(self, inputs, states): - """ + r""" Perform calculations of LSTM. Parameters: @@ -782,7 +782,7 @@ class Decoder(object): """ def initialize(self, inits): - """ + r""" Called once before the decoding iterations. Parameters: @@ -797,7 +797,7 @@ class Decoder(object): raise NotImplementedError def step(self, time, inputs, states, **kwargs): - """ + r""" Called per step of decoding. Parameters: @@ -818,7 +818,7 @@ class Decoder(object): raise NotImplementedError def finalize(self, outputs, final_states, sequence_lengths): - """ + r""" Called once after the decoding iterations if implemented. Parameters: @@ -931,7 +931,7 @@ class BeamSearchDecoder(Decoder): @staticmethod def tile_beam_merge_with_batch(x, beam_size): - """ + r""" Tile the batch dimension of a tensor. Specifically, this function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a shape @@ -966,7 +966,7 @@ class BeamSearchDecoder(Decoder): return x def _split_batch_beams(self, x): - """ + r""" Reshape a tensor with shape `[batch_size * beam_size, ...]` to a new tensor with shape `[batch_size, beam_size, ...]`. @@ -983,7 +983,7 @@ class BeamSearchDecoder(Decoder): return nn.reshape(x, shape=[-1, self.beam_size] + list(x.shape[1:])) def _merge_batch_beams(self, x): - """ + r""" Reshape a tensor with shape `[batch_size, beam_size, ...]` to a new tensor with shape `[batch_size * beam_size, ...]`. @@ -1000,7 +1000,7 @@ class BeamSearchDecoder(Decoder): return nn.reshape(x, shape=[-1] + list(x.shape[2:])) def _expand_to_beam_size(self, x): - """ + r""" This function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a shape `[batch_size, beam_size, s0, s1, ...]` composed of minibatch entries @@ -1023,7 +1023,7 @@ class BeamSearchDecoder(Decoder): return x def _mask_probs(self, probs, finished): - """ + r""" Mask log probabilities. It forces finished beams to allocate all probability mass to eos and unfinished beams to remain unchanged. @@ -1052,7 +1052,7 @@ class BeamSearchDecoder(Decoder): return probs def _gather(self, x, indices, batch_size): - """ + r""" Gather from the tensor `x` using `indices`. Parameters: @@ -1104,7 +1104,7 @@ class BeamSearchDecoder(Decoder): pass def initialize(self, initial_cell_states): - """ + r""" Initialize the BeamSearchDecoder. Parameters: @@ -1162,7 +1162,7 @@ class BeamSearchDecoder(Decoder): init_lengths), init_finished def _beam_search_step(self, time, logits, next_cell_states, beam_state): - """ + r""" Calculate scores and select candidate token ids. Parameters: @@ -1235,7 +1235,7 @@ class BeamSearchDecoder(Decoder): return beam_search_output, beam_search_state def step(self, time, inputs, states, **kwargs): - """ + r""" Perform a beam search decoding step, which uses `cell` to get probabilities, and follows a beam search step to calculate scores and select candidate token ids. @@ -1287,7 +1287,7 @@ class BeamSearchDecoder(Decoder): return (beam_search_output, beam_search_state, next_inputs, finished) def finalize(self, outputs, final_states, sequence_lengths): - """ + r""" Use `gather_tree` to backtrace along the beam search tree and construct the full predicted sequences. @@ -1572,7 +1572,7 @@ def dynamic_decode(decoder, is_test=False, return_length=False, **kwargs): - """ + r""" Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Tensor indicating finished status contains all True values or the number of decoding step reaches to :attr:`max_step_num`. @@ -1664,7 +1664,7 @@ class DecodeHelper(object): """ def initialize(self): - """ + r""" DecodeHelper initialization to produce inputs for the first decoding step and give the initial status telling whether each sequence in the batch is finished. It is the partial of the initialization of `BasicDecoder`. @@ -1698,7 +1698,7 @@ class DecodeHelper(object): pass def next_inputs(self, time, outputs, states, sample_ids): - """ + r""" Produce the inputs and states for next time step and give status telling whether each minibatch entry is finished. It is called after `sample` in `BasicDecoder.step`. It is the partial of `BasicDecoder.step`. @@ -1787,7 +1787,7 @@ class TrainingHelper(DecodeHelper): self.inputs) def initialize(self): - """ + r""" TrainingHelper initialization produces inputs for the first decoding step by slicing at the first time step of full sequence inputs, and it gives initial status telling whether each sequence in the batch is @@ -1809,7 +1809,7 @@ class TrainingHelper(DecodeHelper): return init_inputs, init_finished def sample(self, time, outputs, states): - """ + r""" Perform sampling by using `argmax` according to the `outputs`. Mostly the sampled ids would not be used since the inputs for next decoding step would be got by slicing. @@ -1832,7 +1832,7 @@ class TrainingHelper(DecodeHelper): return sample_ids def next_inputs(self, time, outputs, states, sample_ids): - """ + r""" Generate inputs for the next decoding step by slicing at corresponding step of the full sequence inputs. Simultaneously, produce the states for next time step by directly using the input `states` and emit status @@ -1909,7 +1909,7 @@ class GreedyEmbeddingHelper(DecodeHelper): """ def __init__(self, embedding_fn, start_tokens, end_token): - """ + r""" Constructor of GreedyEmbeddingHelper. Parameters: @@ -1934,7 +1934,7 @@ class GreedyEmbeddingHelper(DecodeHelper): shape=[1], dtype="int64", value=end_token) def initialize(self): - """ + r""" GreedyEmbeddingHelper initialization produces inputs for the first decoding step by using `start_tokens` of the constructor, and gives initial status telling whether each sequence in the batch is finished. @@ -1957,7 +1957,7 @@ class GreedyEmbeddingHelper(DecodeHelper): return init_inputs, init_finished def sample(self, time, outputs, states): - """ + r""" Perform sampling by using `argmax` according to the `outputs`. Parameters: @@ -1978,7 +1978,7 @@ class GreedyEmbeddingHelper(DecodeHelper): return sample_ids def next_inputs(self, time, outputs, states, sample_ids): - """ + r""" Generate inputs for the next decoding step by applying `embedding_fn` to `sample_ids`. Simultaneously, produce the states for next time step by directly using the input `states` and emit status telling whether @@ -2046,7 +2046,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): end_token, softmax_temperature=None, seed=None): - """ + r""" Constructor of SampleEmbeddingHelper. Parameters: @@ -2080,7 +2080,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): self.seed = seed def sample(self, time, outputs, states): - """ + r""" Perform sampling from a categorical distribution, and the distribution is computed by `softmax(outputs/softmax_temperature)`. @@ -2165,7 +2165,7 @@ class BasicDecoder(Decoder): self.output_fn = output_fn def initialize(self, initial_cell_states): - """ + r""" BasicDecoder initialization includes helper initialization and cell initialization, and cell initialization uses `initial_cell_states` as the result directly. @@ -2195,7 +2195,7 @@ class BasicDecoder(Decoder): pass def step(self, time, inputs, states, **kwargs): - """ + r""" Perform one decoding step as following steps: 1. Perform `cell_outputs, cell_states = cell.call(inputs, states)` @@ -2258,7 +2258,7 @@ def dynamic_lstm(input, candidate_activation='tanh', dtype='float32', name=None): - """ + r""" :api_attr: Static Graph **Note**: @@ -2430,7 +2430,7 @@ def lstm(input, name=None, default_initializer=None, seed=-1): - """ + r""" :api_attr: Static Graph **Note**: @@ -2612,7 +2612,7 @@ def dynamic_lstmp(input, c_0=None, cell_clip=None, proj_clip=None): - """ + r""" :api_attr: Static Graph **Note**: @@ -2823,7 +2823,7 @@ def dynamic_gru(input, candidate_activation='tanh', h_0=None, origin_mode=False): - """ + r""" :api_attr: Static Graph **Note: The input type of this must be LoDTensor. If the input type to be @@ -2985,7 +2985,7 @@ def gru_unit(input, activation='tanh', gate_activation='sigmoid', origin_mode=False): - """ + r""" :api_attr: Static Graph Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for @@ -3143,7 +3143,7 @@ def beam_search(pre_ids, is_accumulated=True, name=None, return_parent_idx=False): - """ + r""" Beam search is a classical algorithm for selecting candidate words in a machine translation task. @@ -3293,7 +3293,7 @@ def beam_search(pre_ids, def beam_search_decode(ids, scores, beam_size, end_id, name=None): - """ + r""" This operator is used after beam search has completed. It constructs the full predicted sequences for each sample by walking back along the search @@ -3378,7 +3378,7 @@ def lstm_unit(x_t, param_attr=None, bias_attr=None, name=None): - """ + r""" :api_attr: Static Graph Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for diff --git a/python/paddle/fluid/layers/sequence_lod.py b/python/paddle/fluid/layers/sequence_lod.py index 80faffd477b..df1113660f7 100644 --- a/python/paddle/fluid/layers/sequence_lod.py +++ b/python/paddle/fluid/layers/sequence_lod.py @@ -51,7 +51,7 @@ def sequence_conv(input, param_attr=None, act=None, name=None): - """ + r""" :api_attr: Static Graph **Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use conv2d Op.(fluid.layers.** :ref:`api_fluid_layers_conv2d` ). @@ -175,7 +175,7 @@ def sequence_conv(input, def sequence_softmax(input, use_cudnn=False, name=None): - """ + r""" :api_attr: Static Graph **Note**: @@ -259,7 +259,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): def sequence_pool(input, pool_type, is_test=False, pad_value=0.0): - """ + r""" :api_attr: Static Graph **Notes: The Op only receives LoDTensor as input. If your input is Tensor, please use pool2d Op.(fluid.layers.** :ref:`api_fluid_layers_pool2d` ). @@ -636,7 +636,7 @@ def sequence_slice(input, offset, length, name=None): def sequence_expand(x, y, ref_level=-1, name=None): - """ + r""" :api_attr: Static Graph Sequence Expand Layer. This layer will expand the input variable ``x`` \ @@ -772,7 +772,7 @@ def sequence_expand(x, y, ref_level=-1, name=None): def sequence_expand_as(x, y, name=None): - """ + r""" :api_attr: Static Graph Sequence Expand As Layer. This OP will expand the input variable ``x`` \ @@ -892,7 +892,7 @@ def sequence_expand_as(x, y, name=None): def sequence_pad(x, pad_value, maxlen=None, name=None): - """ + r""" :api_attr: Static Graph This layer padding the sequences in a same batch to a common length (according \ @@ -1233,7 +1233,7 @@ def sequence_scatter(input, index, updates, name=None): def sequence_enumerate(input, win_size, pad_value=0, name=None): - """ + r""" :api_attr: Static Graph Generate a new sequence for the input index sequence with \ @@ -1301,7 +1301,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None): def sequence_mask(x, maxlen=None, dtype='int64', name=None): - """ + r""" **SequenceMask Layer** This layer outputs a mask according to the input :code:`x` and diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index fe3970ce1c1..6e794874afb 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -343,7 +343,7 @@ def concat(input, axis=0, name=None): def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False): - """ + r""" This function concatenates or stacks all tensors in the input LoDTensorArray along the axis mentioned and returns that as the output. @@ -452,7 +452,7 @@ def tensor_array_to_tensor(input, axis=1, name=None, use_stack=False): def sums(input, out=None): - """ + r""" This function computes the sum of multiple input Tensors elementwisely. - Case 1, sum of 3 Tensors @@ -1391,7 +1391,7 @@ def range(start, end, step, dtype, name=None): def linspace(start, stop, num, dtype=None, name=None): - """ + r""" This OP return fixed number of evenly spaced values within a given interval. Args: @@ -1527,7 +1527,7 @@ def zeros_like(x, out=None): @deprecated(since="2.0.0", update_to="paddle.diag") def diag(diagonal): - """ + r""" :alias_main: paddle.diag :alias: paddle.diag,paddle.tensor.diag,paddle.tensor.creation.diag :old_api: paddle.fluid.layers.diag diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index 0c3f6e16732..a3b61f2e911 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -475,7 +475,7 @@ class Accuracy(MetricBase): self.weight = .0 def update(self, value, weight): - """ + r""" This function takes the minibatch states (value, weight) as input, to accumulate and update the corresponding status of the Accuracy object. The update method is as follows: @@ -561,7 +561,7 @@ class ChunkEvaluator(MetricBase): self.num_correct_chunks = 0 def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks): - """ + r""" This function takes (num_infer_chunks, num_label_chunks, num_correct_chunks) as input, to accumulate and update the corresponding status of the ChunkEvaluator object. The update method is as follows: diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index 8df8f6b6891..c47cce76f89 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -42,7 +42,7 @@ def simple_img_conv_pool(input, bias_attr=None, act=None, use_cudnn=True): - """ + r""" :api_attr: Static Graph The simple_img_conv_pool api is composed of :ref:`api_fluid_layers_conv2d` and :ref:`api_fluid_layers_pool2d` . @@ -333,7 +333,7 @@ def sequence_conv_pool(input, def glu(input, dim=-1): - """ + r""" :api_attr: Static Graph The Gated Linear Units(GLU) composed by :ref:`api_fluid_layers_split` , @@ -384,7 +384,7 @@ def scaled_dot_product_attention(queries, values, num_heads=1, dropout_rate=0.): - """ + r""" :api_attr: Static Graph This interface Multi-Head Attention using scaled dot product. diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 7f9ade8fcbd..2d95bfa8c54 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -954,7 +954,7 @@ class Optimizer(object): class SGDOptimizer(Optimizer): - """ + r""" Optimizer of the stochastic gradient descent algorithm. .. math:: @@ -1048,7 +1048,7 @@ class SGDOptimizer(Optimizer): class MomentumOptimizer(Optimizer): - """ + r""" Simple Momentum optimizer with velocity state @@ -1183,7 +1183,7 @@ class MomentumOptimizer(Optimizer): class DGCMomentumOptimizer(Optimizer): - """ + r""" :api_attr: Static Graph DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887 @@ -1603,7 +1603,7 @@ class DGCMomentumOptimizer(Optimizer): class LarsMomentumOptimizer(Optimizer): - """ + r""" Momentum optimizer with LARS support The update equations are as follows: @@ -1735,7 +1735,7 @@ class LarsMomentumOptimizer(Optimizer): class AdagradOptimizer(Optimizer): - """ + r""" The Adaptive Gradient optimizer (Adagrad for short) can adaptively assign different learning rates to individual parameters. @@ -1851,7 +1851,7 @@ class AdagradOptimizer(Optimizer): class AdamOptimizer(Optimizer): - """ + r""" The Adam optimizer uses an optimization described at the end of section 2 of `Adam paper `_ , it can dynamically adjusts the learning rate of each parameter using @@ -2117,7 +2117,7 @@ class AdamOptimizer(Optimizer): class AdamaxOptimizer(Optimizer): - """ + r""" The Adamax optimizer is implemented based on the Adamax Optimization in Section 7 of `Adam paper `_. The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm, @@ -2289,7 +2289,7 @@ class AdamaxOptimizer(Optimizer): class DpsgdOptimizer(Optimizer): - """ + r""" We implement the Dpsgd optimizer according to CCS16 paper - Deep Learning with Differential Privacy. @@ -2384,7 +2384,7 @@ class DpsgdOptimizer(Optimizer): class DecayedAdagradOptimizer(Optimizer): - """ + r""" The Decayed Adagrad optimizer can be seen as an Adagrad algorithm that introduces the decay rate to solve the problem of a sharp drop in the learning rate during model training when using the AdagradOptimizer. @@ -2494,7 +2494,7 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): - """ + r""" **Notes: This API does not support sparse parameter optimization.** Adadelta Optimizer. Please refer to this for details: @@ -2613,7 +2613,7 @@ class AdadeltaOptimizer(Optimizer): class RMSPropOptimizer(Optimizer): - """ + r""" Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. The original slides proposed RMSProp: Slide 29 of http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf . @@ -2801,7 +2801,7 @@ class RMSPropOptimizer(Optimizer): class FtrlOptimizer(Optimizer): - """ + r""" FTRL (Follow The Regularized Leader) Optimizer. The paper that proposed Follow The Regularized Leader (FTRL): @@ -2960,7 +2960,7 @@ class FtrlOptimizer(Optimizer): class LambOptimizer(AdamOptimizer): - """ + r""" LAMB (Layer-wise Adaptive Moments optimizer for Batching training) Optimizer. LAMB Optimizer is designed to scale up the batch size of training without losing @@ -3132,7 +3132,7 @@ Lamb = LambOptimizer class ModelAverage(Optimizer): - """ + r""" :api_attr: Static Graph The ModelAverage optimizer accumulates specific continuous historical parameters @@ -3441,7 +3441,7 @@ class ModelAverage(Optimizer): class ExponentialMovingAverage(object): - """ + r""" :api_attr: Static Graph Compute the moving average of parameters with exponential decay. @@ -4795,7 +4795,7 @@ class RecomputeOptimizer(Optimizer): class LookaheadOptimizer(object): - """ + r""" :api_attr: Static Graph This implements the Lookahead optimizer of the diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 65f7bd64708..7d123e7122e 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -210,7 +210,7 @@ class ParamAttr(object): class WeightNormParamAttr(ParamAttr): - """ + r""" :api_attr: Static Graph Note: diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 09850b3cac9..1cb76b1f390 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -1325,7 +1325,7 @@ class GeneratorLoader(DataLoaderBase): class PyReader(DataLoaderBase): - """ + r""" Create a reader object for data feeding in Python. Data would be prefetched using Python thread and be pushed into a queue asynchronously. Data in the queue would be extracted diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 9fe24ec2c9d..5e0e5f724a8 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -63,7 +63,7 @@ def _create_regularization_of_grad(param, grad, regularization=None): def append_regularization_ops(parameters_and_grads, regularization=None): - """Create and add backward regularization Operators + r"""Create and add backward regularization Operators Creates and adds backward regularization operators in the BlockDesc. This will add gradients of the regularizer function to the gradients @@ -132,7 +132,7 @@ class WeightDecayRegularizer(object): class L2DecayRegularizer(WeightDecayRegularizer): - """ + r""" Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). @@ -239,7 +239,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): class L1DecayRegularizer(WeightDecayRegularizer): - """ + r""" Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py index 095a474fd3a..21180d7f49f 100644 --- a/python/paddle/fluid/tests/unittests/dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py @@ -204,8 +204,8 @@ def train(word_idx): :rtype: callable """ return reader_creator( - re.compile("train/pos/.*\.txt$"), - re.compile("train/neg/.*\.txt$"), word_idx) + re.compile(r"train/pos/.*\.txt$"), + re.compile(r"train/neg/.*\.txt$"), word_idx) def test(word_idx): @@ -221,8 +221,8 @@ def test(word_idx): :rtype: callable """ return reader_creator( - re.compile("test/pos/.*\.txt$"), - re.compile("test/neg/.*\.txt$"), word_idx) + re.compile(r"test/pos/.*\.txt$"), + re.compile(r"test/neg/.*\.txt$"), word_idx) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index bb7e0ca2a0c..4f35befda8e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -230,7 +230,7 @@ class SoftsignLayer(object): class FC(Layer): - """ + r""" This interface is used to construct a callable object of the ``FC`` class. For more details, refer to code examples. It creates a fully connected layer in the network. It can take diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index ec57057164f..e0b7e9033dd 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -227,7 +227,7 @@ class SoftsignLayer(object): class FC(paddle.nn.Layer): - """ + r""" This interface is used to construct a callable object of the ``FC`` class. For more details, refer to code examples. It creates a fully connected layer in the network. It can take diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index 4ae44365f25..ef4cbf0b742 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -235,7 +235,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase): class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): - ''' + r''' Test RNNOp equation: h_t = \sigma (W x_t + U h_{t-1}) diff --git a/python/paddle/fluid/tests/unittests/test_full_like_op.py b/python/paddle/fluid/tests/unittests/test_full_like_op.py index 30bc097428c..3f3b1ee6703 100644 --- a/python/paddle/fluid/tests/unittests/test_full_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_full_like_op.py @@ -31,7 +31,8 @@ class TestFullOp(unittest.TestCase): train_program = Program() with program_guard(train_program, startup_program): fill_value = 2.0 - input = paddle.fluid.data(name='input', dtype='float32', shape=[2, 3]) + input = paddle.fluid.data( + name='input', dtype='float32', shape=[2, 3]) output = paddle.full_like(input, fill_value) output_dtype = paddle.full_like(input, fill_value, dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/test_lrn_op.py b/python/paddle/fluid/tests/unittests/test_lrn_op.py index 29e0a8d6f02..2b632b2437a 100644 --- a/python/paddle/fluid/tests/unittests/test_lrn_op.py +++ b/python/paddle/fluid/tests/unittests/test_lrn_op.py @@ -25,7 +25,7 @@ from paddle.fluid import compiler, Program, program_guard class TestLRNOp(OpTest): def get_input(self): - ''' TODO(gongweibao): why it's grad diff is so large? + r''' TODO(gongweibao): why it's grad diff is so large? x = np.ndarray( shape=(self.N, self.C, self.H, self.W), dtype=float, order='C') for m in range(0, self.N): diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index b738d4b8efe..a8adee742c6 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -232,7 +232,7 @@ class RecurrentOpTest1(unittest.TestCase): class RecurrentOpTest2(RecurrentOpTest1): - ''' + r''' Test RNNOp equation: h_t = \sigma (W x_t + U h_{t-1}) @@ -469,7 +469,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): class RecurrentOpSubBlockTest(RecurrentOpTest1): - ''' + r''' Test RNNOp with subblock variable equation: y_ = emb * w1 @@ -608,7 +608,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): class RecurrentOpStopGradientTest(RecurrentOpTest1): - """ + r""" Test RNNOp with stop_gradient = True equation: h_t = \sigma (W x_t + U h_{t-1}) diff --git a/python/paddle/fluid/tests/unittests/test_require_version.py b/python/paddle/fluid/tests/unittests/test_require_version.py index 80d595c1ef1..d1cb0aa4d81 100644 --- a/python/paddle/fluid/tests/unittests/test_require_version.py +++ b/python/paddle/fluid/tests/unittests/test_require_version.py @@ -79,7 +79,7 @@ class TestErrors(unittest.TestCase): self.assertRaises(TypeError, test_input_type_1) - # The value of params must be in format '\d+(\.\d+){0,3}', like '1.5.2.0', '1.6' ... + # The value of params must be in format r'\d+(\.\d+){0,3}', like '1.5.2.0', '1.6' ... def test_input_value_1(): fluid.require_version('string') diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 510b99c0300..f1808efe86e 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -35,7 +35,7 @@ def _is_numpy_(var): @six.add_metaclass(abc.ABCMeta) class Metric(object): - """ + r""" Base class for metric, encapsulates metric logic and APIs Usage: diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index e7adc7106a4..915668de19d 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -58,7 +58,7 @@ import paddle def elu(x, alpha=1.0, name=None): - """ + r""" elu activation. .. math:: @@ -101,7 +101,7 @@ def elu(x, alpha=1.0, name=None): def gelu(x, approximate=False, name=None): - """ + r""" gelu activation. if approximate is True @@ -155,7 +155,7 @@ def gelu(x, approximate=False, name=None): def hardshrink(x, threshold=0.5, name=None): - """ + r""" hard shrinkage activation .. math:: @@ -204,7 +204,7 @@ def hardshrink(x, threshold=0.5, name=None): def hardtanh(x, min=-1.0, max=1.0, name=None): - """ + r""" hardtanh activation .. math:: @@ -254,7 +254,7 @@ def hardtanh(x, min=-1.0, max=1.0, name=None): def hardsigmoid(x, name=None): - """ + r""" hardsigmoid activation. A 3-part piecewise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), @@ -308,7 +308,7 @@ def hardsigmoid(x, name=None): def hardswish(x, name=None): - """ + r""" hardswish activation hardswish is proposed in MobileNetV3, and performs better in computational stability @@ -357,7 +357,7 @@ def hardswish(x, name=None): def leaky_relu(x, negative_slope=0.01, name=None): - """ + r""" leaky_relu activation .. math:: @@ -515,7 +515,7 @@ def relu(x, name=None): def log_sigmoid(x, name=None): - """ + r""" log_sigmoid activation. .. math:: @@ -552,7 +552,7 @@ def log_sigmoid(x, name=None): def maxout(x, groups, axis=1, name=None): - """ + r""" maxout activation. Assumed the input shape is (N, Ci, H, W). @@ -671,7 +671,7 @@ def selu(x, scale=1.0507009873554804934193349852946, alpha=1.6732632423543772848170429916717, name=None): - """ + r""" selu activation .. math:: @@ -726,7 +726,7 @@ def selu(x, def softmax(x, axis=-1, dtype=None, name=None): - """ + r""" This operator implements the softmax layer. The calculation process is as follows: 1. The dimension :attr:`axis` of ``x`` will be permuted to the last. @@ -880,7 +880,7 @@ def softmax(x, axis=-1, dtype=None, name=None): def softplus(x, beta=1, threshold=20, name=None): - """ + r""" softplus activation .. math:: @@ -925,7 +925,7 @@ def softplus(x, beta=1, threshold=20, name=None): def softshrink(x, threshold=0.5, name=None): - """ + r""" softshrink activation .. math:: @@ -976,7 +976,7 @@ def softshrink(x, threshold=0.5, name=None): def softsign(x, name=None): - """ + r""" softsign activation .. math:: @@ -1013,7 +1013,7 @@ def softsign(x, name=None): def swish(x, name=None): - """ + r""" swish activation. .. math:: @@ -1091,7 +1091,7 @@ def tanhshrink(x, name=None): def thresholded_relu(x, threshold=1.0, name=None): - """ + r""" thresholded relu activation. .. math:: @@ -1137,7 +1137,7 @@ def thresholded_relu(x, threshold=1.0, name=None): def log_softmax(x, axis=-1, dtype=None, name=None): - """ + r""" This operator implements the log_softmax layer. The calculation process is as follows: diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 910a302599f..a4c92883e06 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -1413,7 +1413,7 @@ def cosine_similarity(x1, x2, axis=1, eps=1e-8): def linear(x, weight, bias=None, name=None): - """ + r""" Fully-connected linear transformation operator. For each input :math:`X` , the equation is: @@ -1500,7 +1500,7 @@ def linear(x, weight, bias=None, name=None): def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): - """ + r""" Label smoothing is a mechanism to regularize the classifier layer and is called label-smoothing regularization (LSR). diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index c4410346ca1..75be8f54cd7 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -166,7 +166,7 @@ def conv1d(x, groups=1, data_format='NCL', name=None): - """ + r""" The convolution1D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input and Output are in NCL format, where N is batch size, C is the number of @@ -392,7 +392,7 @@ def conv2d(x, groups=1, data_format="NCHW", name=None): - """ + r""" The convolution2D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input and @@ -568,7 +568,7 @@ def conv1d_transpose(x, output_size=None, data_format="NCL", name=None): - """ + r""" The 1-D convolution transpose layer calculates the output based on the input, filter, and dilation, stride, padding. Input(Input) and output(Output) are in 'NCL' format or 'NLC' where N is batch size, C is the number of channels, @@ -828,7 +828,7 @@ def conv2d_transpose(x, output_size=None, data_format='NCHW', name=None): - """ + r""" The convolution2D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) @@ -1068,7 +1068,7 @@ def conv3d(x, groups=1, data_format="NCDHW", name=None): - """ + r""" The convolution3D layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input(Input) and @@ -1233,7 +1233,7 @@ def conv3d_transpose(x, output_size=None, data_format='NCDHW', name=None): - """ + r""" The convolution3d transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels, diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 4ec0f8407fa..5e80f307eee 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -14,10 +14,7 @@ # TODO: define the extention functions -__all__ = [ - 'diag_embed', - 'row_conv' -] +__all__ = ['diag_embed', 'row_conv'] import numpy as np from ...fluid.data_feeder import check_dtype diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 40b9441c2dc..5cabc4b6755 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -111,7 +111,7 @@ def one_hot(x, num_classes, name=None): def embedding(x, weight, padding_idx=None, sparse=False, name=None): - """ + r""" The operator is used to lookup embeddings vector of ids provided by :attr:`x` . The shape of output Tensor is generated by appending the last dimension of the input Tensor shape diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 1b19c4c1637..fb923e05671 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -184,7 +184,7 @@ def binary_cross_entropy_with_logits(logit, reduction='mean', pos_weight=None, name=None): - """ + r""" This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` layer and some reduce operations. @@ -461,7 +461,7 @@ def hsigmoid_loss(input, def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): - """ + r""" This operator calculates smooth_l1_loss. Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise. In some cases it can prevent exploding gradients and it is more robust and less @@ -544,7 +544,7 @@ def margin_ranking_loss(input, margin=0.0, reduction='mean', name=None): - """ + r""" This op the calcluate the the margin rank loss between the input, other and label, use the math function as follows. @@ -646,7 +646,7 @@ def margin_ranking_loss(input, def l1_loss(input, label, reduction='mean', name=None): - """ + r""" This operator computes the L1 Loss of Tensor ``input`` and ``label`` as follows. If `reduction` set to ``'none'``, the loss is: @@ -840,7 +840,7 @@ def nll_loss(input, def kl_div(input, label, reduction='mean', name=None): - """ + r""" This operator calculates the Kullback-Leibler divergence loss between Input(X) and Input(Target). Notes that Input(X) is the log-probability and Input(Target) is the probability. @@ -947,7 +947,7 @@ def kl_div(input, label, reduction='mean', name=None): def mse_loss(input, label, reduction='mean', name=None): - """ + r""" This op accepts input predications and label and returns the mean square error. If :attr:`reduction` is set to ``'none'``, loss is calculated as: @@ -1121,7 +1121,7 @@ def cross_entropy(input, weight=None, ignore_index=-100, reduction='mean'): - """ + r""" This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``, and ``NLLLoss`` together. @@ -1252,7 +1252,7 @@ def sigmoid_focal_loss(logit, gamma=2.0, reduction='sum', name=None): - """ + r""" `Focal Loss `_ is proposed to address the foreground-background class imbalance for classification tasks. It down-weights easily-classified examples and thus focuses training on hard examples. For example, diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 0a1547bebbb..250039b9646 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -35,7 +35,7 @@ __all__ = [ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): - """ + r""" This op normalizes ``x`` along dimension ``axis`` using :math:`L_p` norm. This layer computes .. math:: @@ -412,7 +412,7 @@ def local_response_norm(x, k=1., data_format="NCHW", name=None): - """ + r""" Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks `_ diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 5e1cb377bd7..a76bc9e86d2 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -54,11 +54,7 @@ import numpy as np # from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS # from ...fluid.layers import shuffle_channel #DEFINE_ALIAS -__all__ = [ - 'affine_grid', - 'grid_sample', - 'pixel_shuffle' -] +__all__ = ['affine_grid', 'grid_sample', 'pixel_shuffle'] def affine_grid(theta, out_shape, align_corners=True, name=None): diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index f0c6880e89d..7e2b6f787f8 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -19,7 +19,7 @@ __all__ = ['KaimingUniform', 'KaimingNormal'] class KaimingNormal(MSRAInitializer): - """Implements the Kaiming Normal initializer + r"""Implements the Kaiming Normal initializer This class implements the weight initialization from the paper `Delving Deep into Rectifiers: Surpassing Human-Level Performance on @@ -62,7 +62,7 @@ class KaimingNormal(MSRAInitializer): class KaimingUniform(MSRAInitializer): - """Implements the Kaiming Uniform initializer + r"""Implements the Kaiming Uniform initializer This class implements the weight initialization from the paper `Delving Deep into Rectifiers: Surpassing Human-Level Performance on diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 5a4e7fec057..821a6984753 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -18,7 +18,7 @@ __all__ = ['XavierNormal', 'XavierUniform'] class XavierNormal(XavierInitializer): - """ + r""" This class implements the Xavier weight initializer from the paper `Understanding the difficulty of training deep feedforward neural networks `_ @@ -71,7 +71,7 @@ class XavierNormal(XavierInitializer): class XavierUniform(XavierInitializer): - """ + r""" This class implements the Xavier weight initializer from the paper `Understanding the difficulty of training deep feedforward neural networks `_ diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index 520762107db..b002b534625 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -50,7 +50,7 @@ from .. import functional as F class ELU(layers.Layer): - """ + r""" ELU Activation. .. math:: @@ -88,7 +88,7 @@ class ELU(layers.Layer): class GELU(layers.Layer): - """ + r""" GELU Activation. If approximate is True @@ -137,7 +137,7 @@ class GELU(layers.Layer): class Hardshrink(layers.Layer): - """ + r""" Hardshrink Activation .. math:: @@ -181,7 +181,7 @@ class Hardshrink(layers.Layer): class Hardswish(layers.Layer): - """ + r""" Hardswish activation Hardswish is proposed in MobileNetV3, and performs better in computational stability @@ -227,7 +227,7 @@ class Hardswish(layers.Layer): class Tanh(layers.Layer): - """ + r""" Tanh Activation. .. math:: @@ -264,7 +264,7 @@ class Tanh(layers.Layer): class Hardtanh(layers.Layer): - """ + r""" Hardtanh Activation .. math:: @@ -442,7 +442,7 @@ class ReLU6(layers.Layer): class SELU(layers.Layer): - """ + r""" SELU Activation .. math:: @@ -488,7 +488,7 @@ class SELU(layers.Layer): class LeakyReLU(layers.Layer): - """ + r""" Leaky ReLU Activation. .. math:: @@ -574,7 +574,7 @@ class Sigmoid(layers.Layer): class Hardsigmoid(layers.Layer): - """ + r""" This interface is used to construct a callable object of the ``Hardsigmoid`` class. This layer calcluate the `hardsigmoid` of input x. @@ -621,7 +621,7 @@ class Hardsigmoid(layers.Layer): class Softplus(layers.Layer): - """ + r""" Softplus Activation .. math:: @@ -661,7 +661,7 @@ class Softplus(layers.Layer): class Softshrink(layers.Layer): - """ + r""" Softshrink Activation .. math:: @@ -702,7 +702,7 @@ class Softshrink(layers.Layer): class Softsign(layers.Layer): - """ + r""" Softsign Activation .. math:: @@ -737,7 +737,7 @@ class Softsign(layers.Layer): class Swish(layers.Layer): - """ + r""" Swish Activation. .. math:: @@ -807,7 +807,7 @@ class Tanhshrink(layers.Layer): class ThresholdedReLU(layers.Layer): - """ + r""" Thresholded ReLU Activation .. math:: @@ -847,7 +847,7 @@ class ThresholdedReLU(layers.Layer): class LogSigmoid(layers.Layer): - """ + r""" LogSigmoid Activation. .. math:: @@ -882,7 +882,7 @@ class LogSigmoid(layers.Layer): class Softmax(layers.Layer): - """ + r""" Softmax Activation. This operator implements the softmax layer. The calculation process is as follows: @@ -1005,7 +1005,7 @@ class Softmax(layers.Layer): class LogSoftmax(layers.Layer): - """ + r""" This operator implements the log_softmax layer. The calculation process is as follows: .. math:: @@ -1059,7 +1059,7 @@ class LogSoftmax(layers.Layer): class Maxout(layers.Layer): - """ + r""" Maxout Activation. Assumed the input shape is (N, Ci, H, W). diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 9a3edef5e4c..8558e0f1793 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -40,7 +40,7 @@ __all__ = [ class Linear(layers.Layer): - """ + r""" Fully-connected linear transformation layer. For each input :math:`X` , the equation is: @@ -381,7 +381,7 @@ class Upsample(layers.Layer): class Bilinear(layers.Layer): - """ + r""" This layer performs bilinear on two inputs. @@ -988,7 +988,7 @@ class CosineSimilarity(layers.Layer): class Embedding(layers.Layer): - """ + r""" **Embedding Layer** This interface is used to construct a callable object of the ``Embedding`` class. diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 0b0d0e302b8..d554bb0fd96 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -141,7 +141,7 @@ class _ConvNd(layers.Layer): class Conv1D(_ConvNd): - """ + r""" This interface is used to construct a callable object of the ``Conv1D`` class. For more details, refer to code examples. The convolution1D layer calculates the output based on the input, filter @@ -294,7 +294,7 @@ class Conv1D(_ConvNd): class Conv1DTranspose(_ConvNd): - """ + r""" This interface is used to construct a callable object of the ``Conv1DTranspose`` class. For more details, refer to code examples. The 1-D convolution transpose layer calculates the output based on the input, @@ -469,7 +469,7 @@ class Conv1DTranspose(_ConvNd): class Conv2D(_ConvNd): - """ + r""" This interface is used to construct a callable object of the ``Conv2D`` class. For more details, refer to code examples. The convolution2D layer calculates the output based on the input, filter @@ -626,7 +626,7 @@ class Conv2D(_ConvNd): class Conv2DTranspose(_ConvNd): - """ + r""" This interface is used to construct a callable object of the ``Conv2DTranspose`` class. For more details, refer to code examples. The convolution2D transpose layer calculates the output based on the input, @@ -786,7 +786,7 @@ class Conv2DTranspose(_ConvNd): class Conv3D(_ConvNd): - """ + r""" **Convlution3d Layer** The convolution3d layer calculates the output based on the input, filter and strides, paddings, dilations, groups parameters. Input(Input) and @@ -943,7 +943,7 @@ class Conv3D(_ConvNd): class Conv3DTranspose(_ConvNd): - """ + r""" **Convlution3D transpose layer** The convolution3D transpose layer calculates the output based on the input, filter, and dilations, strides, paddings. Input(Input) and output(Output) diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index 28b29a583d8..5a3c611b3c4 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -24,7 +24,7 @@ from ...fluid.layer_helper import LayerHelper class PairwiseDistance(layers.Layer): - """ + r""" This operator computes the pairwise distance between two vectors. The distance is calculated by p-oreder norm: diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 96db0dde54f..faf1345c7ba 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -36,7 +36,7 @@ __all__ = [ class BCEWithLogitsLoss(fluid.dygraph.Layer): - """ + r""" This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` layer and some reduce operations. @@ -141,7 +141,7 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer): class CrossEntropyLoss(fluid.dygraph.Layer): - """ + r""" :alias_main: paddle.nn.CrossEntropyLoss :alias: paddle.nn.CrossEntropyLoss,paddle.nn.layer.CrossEntropyLoss,paddle.nn.layer.loss.CrossEntropyLoss @@ -375,7 +375,7 @@ class HSigmoidLoss(fluid.dygraph.Layer): class MSELoss(fluid.dygraph.layers.Layer): - """ + r""" **Mean Square Error Loss** Computes the mean square error (squared L2 norm) of given input and label. @@ -454,7 +454,7 @@ class MSELoss(fluid.dygraph.layers.Layer): class L1Loss(fluid.dygraph.Layer): - """ + r""" This interface is used to construct a callable object of the ``L1Loss`` class. The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. @@ -622,7 +622,7 @@ class BCELoss(fluid.dygraph.Layer): class NLLLoss(fluid.dygraph.Layer): - """ + r""" This class accepts input and target label and returns negative log likelihood cross error. It is useful to train a classification problem with C classes. @@ -733,7 +733,7 @@ class NLLLoss(fluid.dygraph.Layer): class KLDivLoss(fluid.dygraph.Layer): - """ + r""" This interface calculates the Kullback-Leibler divergence loss between Input(X) and Input(Target). Notes that Input(X) is the log-probability and Input(Target) is the probability. @@ -806,7 +806,7 @@ class KLDivLoss(fluid.dygraph.Layer): class MarginRankingLoss(fluid.dygraph.Layer): - """ + r""" This interface is used to construct a callable object of the ``MarginRankingLoss`` class. The MarginRankingLoss layer calculates the margin rank loss between the input, other and label @@ -958,7 +958,7 @@ class CTCLoss(fluid.dygraph.Layer): class SmoothL1Loss(fluid.dygraph.Layer): - """ + r""" This operator calculates smooth_l1_loss. Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise. In some cases it can prevent exploding gradients and it is more robust and less diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 5e2292d40d2..7f416749c8a 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -109,7 +109,7 @@ class _InstanceNormBase(layers.Layer): class InstanceNorm1D(_InstanceNormBase): - """ + r""" Applies Instance Normalization over a 3D input (a mini-batch of 1D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . DataLayout: NCL `[batch, in_channels, length]` @@ -181,7 +181,7 @@ class InstanceNorm1D(_InstanceNormBase): class InstanceNorm2D(_InstanceNormBase): - """ + r""" Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . DataLayout: NCHW `[batch, in_channels, in_height, in_width]` @@ -252,7 +252,7 @@ class InstanceNorm2D(_InstanceNormBase): class InstanceNorm3D(_InstanceNormBase): - """ + r""" Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs with additional channel dimension) as described in the paper Instance Normalization: The Missing Ingredient for Fast Stylization . DataLayout: NCHW `[batch, in_channels, D, in_height, in_width]` @@ -437,7 +437,7 @@ class GroupNorm(layers.Layer): class LayerNorm(layers.Layer): - """ + r""" :alias_main: paddle.nn.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm :old_api: paddle.fluid.dygraph.LayerNorm @@ -649,7 +649,7 @@ class _BatchNormBase(layers.Layer): class BatchNorm1D(_BatchNormBase): - """ + r""" Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . When track_running_stats = False, the :math:`\\mu_{\\beta}` @@ -740,7 +740,7 @@ class BatchNorm1D(_BatchNormBase): class BatchNorm2D(_BatchNormBase): - """ + r""" Applies Batch Normalization over a 4D input (a mini-batch of 2D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . When track_running_stats = False, the :math:`\\mu_{\\beta}` @@ -829,7 +829,7 @@ class BatchNorm2D(_BatchNormBase): class BatchNorm3D(_BatchNormBase): - """ + r""" Applies Batch Normalization over a 5D input (a mini-batch of 3D inputswith additional channel dimension) as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift . When track_running_stats = False, the :math:`\\mu_{\\beta}` @@ -919,7 +919,7 @@ class BatchNorm3D(_BatchNormBase): class SyncBatchNorm(_BatchNormBase): - """ + r""" This interface is used to construct a callable object of the ``SyncBatchNorm`` class. It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can be used as a normalizer function for other operations, such as conv2d and fully connected diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 7be229bdce0..dc065918f3d 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -120,7 +120,7 @@ class AvgPool1D(layers.Layer): class AvgPool2D(layers.Layer): - """ + r""" This operation applies 2D average pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are in NCHW format, where N is batch size, C is the number of channels, @@ -401,7 +401,7 @@ class MaxPool1D(layers.Layer): class MaxPool2D(layers.Layer): - """ + r""" This operation applies 2D max pooling over input feature based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are in NCHW format, where N is batch size, C is the number of channels, @@ -595,7 +595,7 @@ class MaxPool3D(layers.Layer): class AdaptiveAvgPool1D(layers.Layer): - """ + r""" This operation applies a 1D adaptive average pooling over an input signal composed of several input planes, based on the input, output_size, return_mask parameters. @@ -663,7 +663,7 @@ class AdaptiveAvgPool1D(layers.Layer): class AdaptiveAvgPool2D(layers.Layer): - """ + r""" This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. @@ -745,7 +745,7 @@ class AdaptiveAvgPool2D(layers.Layer): class AdaptiveAvgPool3D(layers.Layer): - """ + r""" This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index ea4f6970bc6..0da00735b43 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -157,7 +157,7 @@ class MultiHeadAttention(Layer): embed_dim, embed_dim, weight_attr, bias_attr=bias_attr) def _prepare_qkv(self, query, key, value, cache=None): - """ + r""" Prapares linear projected queries, keys and values for usage of subsequnt multiple parallel attention. If `cache` is not None, using cached results to reduce redundant calculations. @@ -212,7 +212,7 @@ class MultiHeadAttention(Layer): return (q, k, v) if cache is None else (q, k, v, cache) def compute_kv(self, key, value): - """ + r""" Applies linear projection on input keys and values, then splits heads (reshape and transpose) to get keys and values from different representation subspaces. The results are used as key-values pairs for subsequent multiple @@ -312,7 +312,7 @@ class MultiHeadAttention(Layer): return self.Cache(key, value) def forward(self, query, key, value, attn_mask=None, cache=None): - """ + r""" Applies multi-head attention to map queries and a set of key-value pairs to outputs. @@ -499,7 +499,7 @@ class TransformerEncoderLayer(Layer): self.activation = getattr(F, activation) def forward(self, src, src_mask=None): - """ + r""" Applies a Transformer encoder layer on the input. Parameters: @@ -575,7 +575,7 @@ class TransformerEncoder(Layer): self.norm = norm def forward(self, src, src_mask=None): - """ + r""" Applies a stack of N Transformer encoder layers on inputs. If `norm` is provided, also applies layer normalization on the output of last encoder layer. @@ -725,7 +725,7 @@ class TransformerDecoderLayer(Layer): self.activation = getattr(F, activation) def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): - """ + r""" Applies a Transformer decoder layer on the input. Parameters: @@ -801,7 +801,7 @@ class TransformerDecoderLayer(Layer): static_cache)) def gen_cache(self, memory): - """ + r""" Generates cache for `forward` usage. The generated cache is a tuple composed of an instance of `MultiHeadAttention.Cache` and an instance of `MultiHeadAttention.StaticCache`. @@ -873,7 +873,7 @@ class TransformerDecoder(Layer): self.norm = norm def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): - """ + r""" Applies a stack of N Transformer decoder layers on inputs. If `norm` is provided, also applies layer normalization on the output of last decoder layer. @@ -937,7 +937,7 @@ class TransformerDecoder(Layer): return output if cache is None else (output, new_caches) def gen_cache(self, memory, do_zip=False): - """ + r""" Generates cache for `forward` usage. The generated cache is a list, and each element in it is a tuple( :code:`(incremental_cache, static_cache)` ) produced by `TransformerDecoderLayer.gen_cache`. See `TransformerDecoderLayer.gen_cache` @@ -1139,7 +1139,7 @@ class Transformer(Layer): self.nhead = nhead def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None): - """ + r""" Applies a Transformer model on the inputs. Parameters: diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py index 7a21e7661d4..b14fb3e2120 100644 --- a/python/paddle/nn/utils/weight_norm_hook.py +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -153,7 +153,7 @@ class WeightNorm(object): def weight_norm(layer, name='weight', dim=0): - """ + r""" This weight_norm layer applies weight normalization to a parameter according to the following formula: diff --git a/python/paddle/optimizer/adadelta.py b/python/paddle/optimizer/adadelta.py index bba2c11ea07..91591d23f00 100644 --- a/python/paddle/optimizer/adadelta.py +++ b/python/paddle/optimizer/adadelta.py @@ -21,7 +21,7 @@ __all__ = ["Adadelta"] class Adadelta(Optimizer): - """ + r""" **Notes: This API does not support sparse parameter optimization.** Adadelta Optimizer. Please refer to this for details: diff --git a/python/paddle/optimizer/adagrad.py b/python/paddle/optimizer/adagrad.py index ed55ebd0bf2..72a3f8ce996 100644 --- a/python/paddle/optimizer/adagrad.py +++ b/python/paddle/optimizer/adagrad.py @@ -21,7 +21,7 @@ __all__ = ["Adagrad"] class Adagrad(Optimizer): - """ + r""" The Adaptive Gradient optimizer (Adagrad for short) use an optimization described in paper: `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization `_. diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 79caa158312..37510231219 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -24,7 +24,7 @@ __all__ = ["Adam"] class Adam(Optimizer): - """ + r""" The Adam optimizer uses an optimization described at the end of section 2 of `Adam paper `_ , it can dynamically adjusts the learning rate of each parameter using diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index e5d1962d126..5d164fa7623 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -21,7 +21,7 @@ __all__ = ["Adamax"] class Adamax(Optimizer): - """ + r""" The Adamax optimizer is implemented based on the Adamax Optimization in Section 7 of `Adam paper `_. The Adamax algorithm is a variant of the Adam algorithm based on the infinite norm, diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index 0ffff675903..b597109d314 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -23,7 +23,7 @@ __all__ = ['AdamW'] class AdamW(Adam): - """ + r""" The AdamW optimizer is implemented based on the AdamW Optimization in paper `DECOUPLED WEIGHT DECAY REGULARIZATION `_. it can resolves the problem of L2 regularization failure in the Adam optimizer. diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 2d5dc5d998e..5085911ce92 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -192,7 +192,7 @@ class LRScheduler(object): class NoamDecay(LRScheduler): - """ + r""" Applies Noam Decay to the initial learning rate. @@ -376,7 +376,7 @@ class PiecewiseDecay(LRScheduler): class NaturalExpDecay(LRScheduler): - """ + r""" Applies natural exponential decay to the initial learning rate. @@ -455,7 +455,7 @@ class NaturalExpDecay(LRScheduler): class InverseTimeDecay(LRScheduler): - """ + r""" Applies inverse time decay to the initial learning rate. @@ -536,7 +536,7 @@ class InverseTimeDecay(LRScheduler): class PolynomialDecay(LRScheduler): - """ + r""" Applies polynomial decay to the initial learning rate. @@ -656,7 +656,7 @@ class PolynomialDecay(LRScheduler): class LinearWarmup(LRScheduler): - """ + r""" Linear learning rate warm up strategy. Update the learning rate preliminarily before the normal learning rate scheduler. For more information, please refer to `Bag of Tricks for Image Classification with Convolutional Neural Networks `_ @@ -794,7 +794,7 @@ class LinearWarmup(LRScheduler): class ExponentialDecay(LRScheduler): - """ + r""" Update learning rate by `gamma` each epoch. @@ -1383,7 +1383,7 @@ class ReduceOnPlateau(LRScheduler): class CosineAnnealingDecay(LRScheduler): - """ + r""" Set the learning rate using a cosine annealing schedule, where :math:`\eta_{max}` is set to the initial learning_rate. :math:`T_{cur}` is the number of epochs since the last restart in diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index 87fa86c1761..2cfd8deaef7 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -21,7 +21,7 @@ __all__ = ["Momentum"] class Momentum(Optimizer): - """ + r""" Simple Momentum optimizer with velocity state diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index d0326b4155a..030d419de48 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -47,7 +47,7 @@ __all__ = ['Optimizer'] class Optimizer(object): - """Optimizer Base class. + r"""Optimizer Base class. Define the common interface of an optimizer. User should not use this class directly, diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index a664b015956..12825bb7813 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -21,7 +21,7 @@ __all__ = ["RMSProp"] class RMSProp(Optimizer): - """ + r""" Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. The original slides proposed RMSProp: Slide 29 of http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf . diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index 133c3dfb24f..44e5695a2cf 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -21,7 +21,7 @@ __all__ = ["SGD"] class SGD(Optimizer): - """ + r""" Optimizer of the stochastic gradient descent algorithm. .. math:: diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index 881cfd81314..1a4d4546923 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" +r""" At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that diff --git a/python/paddle/regularizer.py b/python/paddle/regularizer.py index a1ab329169a..586ae0f988c 100644 --- a/python/paddle/regularizer.py +++ b/python/paddle/regularizer.py @@ -18,7 +18,7 @@ import paddle.fluid as fluid class L1Decay(fluid.regularizer.L1Decay): - """ + r""" Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse. It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). @@ -80,7 +80,7 @@ class L1Decay(fluid.regularizer.L1Decay): class L2Decay(fluid.regularizer.L2Decay): - """ + r""" Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting. It can be set in :ref:`api_paddle_ParamAttr` or ``optimizer`` (such as :ref:`api_paddle_optimizer_Momentum` ). diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index a25a8fb191b..84a5ed9950a 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -14,7 +14,6 @@ from __future__ import print_function - import errno import inspect import logging @@ -31,7 +30,6 @@ from paddle.fluid.io import prepend_feed_ops, append_fetch_ops, save_persistable from paddle.fluid.io import load_persistables, _endpoints_replacement from paddle.fluid.log_helper import get_logger - __all__ = [ 'save_inference_model', 'load_inference_model', @@ -44,10 +42,13 @@ _logger = get_logger( def _check_args(caller, args, supported_args=[], deprecated_args=[]): for arg in args: if arg in deprecated_args: - raise ValueError("argument '{}' in function '{}' is deprecated, only {} are supported.".format(arg, caller, supported_args)) + raise ValueError( + "argument '{}' in function '{}' is deprecated, only {} are supported.". + format(arg, caller, supported_args)) elif arg not in supported_args: raise ValueError( - "function '{}' doesn't support argument '{}',\n only {} are supported.".format(caller, arg, supported_args)) + "function '{}' doesn't support argument '{}',\n only {} are supported.". + format(caller, arg, supported_args)) @static_only @@ -129,14 +130,18 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): # verify feed_vars if not isinstance(feed_vars, list): feed_vars = [feed_vars] - if not feed_vars or not all([isinstance(var, Variable) for var in feed_vars]): - raise ValueError("'feed_vars' should be a Variable or a list of Variable.") + if not feed_vars or not all( + [isinstance(var, Variable) for var in feed_vars]): + raise ValueError( + "'feed_vars' should be a Variable or a list of Variable.") # verify fetch_vars if not isinstance(fetch_vars, list): fetch_vars = [fetch_vars] - if not fetch_vars or not all([isinstance(var, Variable) for var in fetch_vars]): - raise ValueError("'fetch_vars' should be a Variable or a list of Variable.") + if not fetch_vars or not all( + [isinstance(var, Variable) for var in fetch_vars]): + raise ValueError( + "'fetch_vars' should be a Variable or a list of Variable.") main_program = _get_valid_program() # remind users to set auc_states to 0 if auc op were found. @@ -145,7 +150,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName() op._set_attr(device_attr_name, "") if op.type == 'auc': - warnings.warn("Be sure that you have set auc states to 0 before saving inference model.") + warnings.warn( + "Be sure that you have set auc states to 0 before saving inference model." + ) break # fix the bug that the activation op's output as target will be pruned. @@ -154,10 +161,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor): with program_guard(main_program): uniq_fetch_vars = [] for i, var in enumerate(fetch_vars): - var = layers.scale(var, 1., name="save_infer_model/scale_{}".format(i)) + var = layers.scale( + var, 1., name="save_infer_model/scale_{}".format(i)) uniq_fetch_vars.append(var) fetch_vars = uniq_fetch_vars - + # save model origin_program = main_program.clone() main_program = main_program.clone() @@ -257,7 +265,7 @@ def load_inference_model(path_prefix, executor, **configs): """ # check configs supported_args = ('model_filename', 'params_filename') - deprecated_args = ('pserver_endpoints',) + deprecated_args = ('pserver_endpoints', ) caller = inspect.currentframe().f_code.co_name _check_args(caller, configs, supported_args, deprecated_args) @@ -268,8 +276,7 @@ def load_inference_model(path_prefix, executor, **configs): params_filename = configs.get('params_filename', None) if params_filename is None: raise ValueError( - "params_filename cannot be None when path_prefix is None." - ) + "params_filename cannot be None when path_prefix is None.") load_dirname = path_prefix program_desc_str = model_filename params_filename = params_filename @@ -297,18 +304,21 @@ def load_inference_model(path_prefix, executor, **configs): if model_filename is None: model_path = os.path.join(path_prefix, "__model__") else: - model_path = os.path.join(path_prefix, model_filename + ".pdmodel") + model_path = os.path.join(path_prefix, + model_filename + ".pdmodel") if not os.path.exists(model_path): model_path = os.path.join(path_prefix, model_filename) # set params_path if params_filename is None: params_path = os.path.join(path_prefix, "") else: - params_path = os.path.join(path_prefix, params_filename + ".pdiparams") + params_path = os.path.join(path_prefix, + params_filename + ".pdiparams") if not os.path.exists(params_path): params_path = os.path.join(path_prefix, params_filename) _logger.warning("The old way to load inference model is deprecated." - " model path: {}, params path: {}".format(model_path, params_path)) + " model path: {}, params path: {}".format( + model_path, params_path)) with open(model_path, "rb") as f: program_desc_str = f.read() load_dirname = os.path.dirname(params_path) @@ -328,4 +338,3 @@ def load_inference_model(path_prefix, executor, **configs): ] return [program, feed_target_names, fetch_targets] - diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 44f0a73fa42..0806d2c2914 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -26,7 +26,7 @@ def fc(x, bias_attr=None, activation=None, name=None): - """ + r""" Fully-Connected layer can take a tensor or a list of tensor as its inputs. It creates a 2-D weight tensor for each input tensor, which represents its @@ -180,7 +180,7 @@ def deform_conv2d(x, weight_attr=None, bias_attr=None, name=None): - """ + r""" Compute 2-D deformable convolution on 4-D input. Given input image x, output feature map y, the deformable convolution operation can be expressed as follow: diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index b46e1c79461..32e86c96b4e 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -54,7 +54,7 @@ __all__ = [ @dygraph_only def to_tensor(data, dtype=None, place=None, stop_gradient=True): - """ + r""" Constructs a ``paddle.Tensor`` or ``paddle.ComplexTensor`` from ``data`` , which can be scalar, tuple, list, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor. @@ -609,7 +609,7 @@ def _tril_triu_op(helper): def tril(x, diagonal=0, name=None): - """ + r""" :alias_main: paddle.tril :alias: paddle.tril,paddle.tensor.tril,paddle.tensor.creation.tril @@ -680,7 +680,7 @@ def tril(x, diagonal=0, name=None): def triu(x, diagonal=0, name=None): - """ + r""" :alias_main: paddle.triu :alias: paddle.triu,paddle.tensor.triu,paddle.tensor.creation.triu diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 25fb9343179..b1c0f0b446a 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -453,7 +453,7 @@ def norm(x, p='fro', axis=None, keepdim=False, name=None): def dist(x, y, p=2): - """ + r""" This OP returns the p-norm of (x - y). It is not a norm in a strict sense, only as a measure of distance. The shapes of x and y must be broadcastable. The definition is as follows, for @@ -740,7 +740,7 @@ def cross(x, y, axis=None, name=None): def cholesky(x, upper=False, name=None): - """ + r""" Computes the Cholesky decomposition of one symmetric positive-definite matrix or batches of symmetric positive-definite matrice. diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 0bda55a1fae..7ea8a9286c3 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -169,7 +169,7 @@ def flip(x, axis, name=None): def flatten(x, start_axis=0, stop_axis=-1, name=None): - """ + r""" **Flatten op** Flattens a contiguous range of axes in a tensor according to start_axis and stop_axis. @@ -565,7 +565,7 @@ def unique(x, axis=None, dtype="int64", name=None): - """ + r""" Returns the unique elements of `x` in ascending order. Args: @@ -946,7 +946,7 @@ def scatter(x, index, updates, overwrite=True, name=None): def scatter_nd_add(x, index, updates, name=None): - """ + r""" **Scatter_nd_add Layer** Output is obtained by applying sparse addition to a single value diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index ccc49c769c2..e7b72fe95bc 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -379,7 +379,7 @@ def floor_divide(x, y, name=None): def remainder(x, y, name=None): - """ + r""" Mod two tensors element-wise. The equation is: .. math:: @@ -981,7 +981,7 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None): def logsumexp(x, axis=None, keepdim=False, name=None): - """ + r""" This OP calculates the log of the sum of exponentials of ``x`` along ``axis`` . .. math:: @@ -1281,7 +1281,7 @@ def min(x, axis=None, keepdim=False, name=None): def log1p(x, name=None): - """ + r""" Calculates the natural log of the given input tensor, element-wise. .. math:: Out = \\ln(x+1) @@ -1315,7 +1315,7 @@ def log1p(x, name=None): return out def log2(x, name=None): - """ + r""" Calculates the log to the base 2 of the given input tensor, element-wise. .. math:: @@ -1365,7 +1365,7 @@ def log2(x, name=None): def log10(x, name=None): - """ + r""" Calculates the log to the base 10 of the given input tensor, element-wise. .. math:: @@ -1947,7 +1947,7 @@ def sign(x, name=None): def tanh(x, name=None): - """ + r""" Tanh Activation Operator. .. math:: diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index f5e0dc4c05b..c4a3bf4b1b6 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -494,7 +494,7 @@ def sort(x, axis=-1, descending=False, name=None): def where(condition, x, y, name=None): - """ + r""" Return a tensor of elements selected from either $x$ or $y$, depending on $condition$. .. math:: diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py index f1bf247efca..f02b5981906 100644 --- a/python/paddle/text/datasets/imdb.py +++ b/python/paddle/text/datasets/imdb.py @@ -93,7 +93,7 @@ class Imdb(Dataset): def _build_work_dict(self, cutoff): word_freq = collections.defaultdict(int) - pattern = re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$") + pattern = re.compile(r"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$") for doc in self._tokenize(pattern): for word in doc: word_freq[word] += 1 @@ -123,8 +123,8 @@ class Imdb(Dataset): return data def _load_anno(self): - pos_pattern = re.compile("aclImdb/{}/pos/.*\.txt$".format(self.mode)) - neg_pattern = re.compile("aclImdb/{}/neg/.*\.txt$".format(self.mode)) + pos_pattern = re.compile(r"aclImdb/{}/pos/.*\.txt$".format(self.mode)) + neg_pattern = re.compile(r"aclImdb/{}/neg/.*\.txt$".format(self.mode)) UNK = self.word_idx[''] diff --git a/r/example/mobilenet.py b/r/example/mobilenet.py index adb1c330a70..99e755ab69f 100755 --- a/r/example/mobilenet.py +++ b/r/example/mobilenet.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3.7 + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # pylint: skip-file import functools diff --git a/tools/check_ctest_hung.py b/tools/check_ctest_hung.py index c44690a93ac..556c8ef6043 100644 --- a/tools/check_ctest_hung.py +++ b/tools/check_ctest_hung.py @@ -42,11 +42,11 @@ Diff: set(['test_parallel_executor_crf']) for l in fn.readlines(): if l.find("Test ") != -1 and \ l.find("Passed") != -1: - m = re.search("Test\s+#[0-9]*\:\s([a-z0-9_]+)", escape(l)) + m = re.search(r"Test\s+#[0-9]*\:\s([a-z0-9_]+)", escape(l)) passed.add(m.group(1)) if l.find("Start ") != -1: start_parts = escape(l).split(" ") - m = re.search("Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l)) + m = re.search(r"Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l)) started.add(m.group(1)) print("Diff: ", started - passed) diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py index 8d4b24a0cf6..823d9470230 100644 --- a/tools/codestyle/docstring_checker.py +++ b/tools/codestyle/docstring_checker.py @@ -101,7 +101,7 @@ class Docstring(object): def _arg_with_type(self): for t in self.d['Args']: - m = re.search('([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) + m = re.search(r'([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) if m: self.args[m.group(1)] = m.group(2) diff --git a/tools/coverage/coverage_diff.py b/tools/coverage/coverage_diff.py index 38f671fe408..6a400d293b2 100644 --- a/tools/coverage/coverage_diff.py +++ b/tools/coverage/coverage_diff.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: coverage_diff.py info_file diff_file > > coverage-diff.info """ diff --git a/tools/coverage/coverage_diff_list.py b/tools/coverage/coverage_diff_list.py index 8975185edad..62834301209 100644 --- a/tools/coverage/coverage_diff_list.py +++ b/tools/coverage/coverage_diff_list.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: coverage_diff_list.py list_file max_rate > coverage-diff-list-90.out """ diff --git a/tools/coverage/coverage_lines.py b/tools/coverage/coverage_lines.py index cdec5b8b1bb..553cd691e45 100644 --- a/tools/coverage/coverage_lines.py +++ b/tools/coverage/coverage_lines.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: coverage_lines.py info_file expected """ diff --git a/tools/coverage/cuda_clean.py b/tools/coverage/cuda_clean.py index c71ff375fd5..8c03edd0785 100644 --- a/tools/coverage/cuda_clean.py +++ b/tools/coverage/cuda_clean.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: cuda_clean.py pull_id. """ import os diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py index f5726db005e..39fa3509cb8 100644 --- a/tools/coverage/gcda_clean.py +++ b/tools/coverage/gcda_clean.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: gcda_clean.py pull_id. """ import os diff --git a/tools/coverage/pull_request.py b/tools/coverage/pull_request.py index 105460032f7..f3e88286ca9 100644 --- a/tools/coverage/pull_request.py +++ b/tools/coverage/pull_request.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: pull_request.py files pull_id pull_request.py diff pull_id diff --git a/tools/coverage/python_coverage.py b/tools/coverage/python_coverage.py index 8ad9d85c1bf..f2e52b5e23b 100644 --- a/tools/coverage/python_coverage.py +++ b/tools/coverage/python_coverage.py @@ -1,5 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ usage: python_coverage.py > python-coverage.info """ diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index 1e3d7178922..18ebdb00317 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -20,7 +20,7 @@ import requests def download_file(): """Get disabled unit tests""" ssl._create_default_https_context = ssl._create_unverified_context - sysstr=sys.platform + sysstr = sys.platform if sysstr == 'win32': url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_win') else: diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index d23c18a44e9..ce0490d487f 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -236,20 +236,24 @@ def single_defcom_extract(start_from, srcls, is_class_begin=False): if srcls[x].startswith('def ') or srcls[x].startswith('class '): break else: - if (comstart == -1 and srcls[x].replace(" ", '').replace( - "\t", '').replace("\n", '').startswith("\"\"\"")): - comstart = x - comstyle = 2 - continue + if comstart == -1: + s = srcls[x].replace(" ", '').replace("\t", + '').replace("\n", '') + if s.startswith("\"\"\"") or s.startswith("r\"\"\""): + comstart = x + comstyle = 2 + continue if (comstyle == 2 and comstart != -1 and srcls[x].replace(" ", '').replace("\t", '').replace( "\n", '').startswith("\"\"\"")): break - if (comstart == -1 and srcls[x].replace(" ", '').replace( - "\t", '').replace("\n", '').startswith("\'\'\'")): - comstart = x - comstyle = 1 - continue + if comstart == -1: + s = srcls[x].replace(" ", '').replace("\t", + '').replace("\n", '') + if s.startswith("\'\'\'") or s.startswith("r\'\'\'"): + comstart = x + comstyle = 1 + continue if (comstyle == 1 and comstart != -1 and srcls[x].replace(" ", '').replace("\t", '').replace( "\n", '').startswith("\'\'\'")): diff --git a/tools/summary_env.py b/tools/summary_env.py index 39d6acaf536..38bae87651d 100644 --- a/tools/summary_env.py +++ b/tools/summary_env.py @@ -92,7 +92,7 @@ def get_cudnn_info(): cudnn_dll_path = run_shell_command('where cudnn*') if cudnn_dll_path: cudnn_header_path = cudnn_dll_path.split('bin')[ - 0] + 'include\cudnn.h' + 0] + r'include\cudnn.h' cmd = 'type "{0}" | findstr "{1}" | findstr /v "CUDNN_VERSION"' else: envs['cudnn_version'] = None -- GitLab