未验证 提交 7a58431c 编写于 作者: Z zhang wenhui 提交者: GitHub

fix norm api doc, test=develop (#27652)

* fix norm api doc, test=develop

* fix error message, test=develop

* fix api norm, test=develop

* add adagrad, test=develop

* fix bug, test=develop

* fix bug, test=develop

* add spetral_norm, test=develop

* fix adagrad, test=develop

* merge , test=develop
上级 3eb106da
......@@ -381,7 +381,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %s", data_layout_str));
}
// if MomentumTensor is set, use MomentumTensor value, momentum
......@@ -446,7 +447,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %d", data_layout);
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %d", data_layout));
}
}
};
......@@ -799,7 +801,8 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
PADDLE_THROW(platform::errors::InvalidArgument(
"Unknown storage order: %s", data_layout_str));
}
}
};
......
......@@ -108,7 +108,8 @@ void FlListenAndServOp::RunSyncLoop(framework::Executor *executor,
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks");
platform::errors::InvalidArgument(
"server program should have at least 2 blocks"));
// Prepare all the server block
std::vector<int> optimize_blocks_list;
......@@ -192,7 +193,8 @@ void FlListenAndServOp::RunImpl(const framework::Scope &scope,
auto fan_in = Attr<int>("Fanin");
auto inputs = Inputs("X");
PADDLE_ENFORCE_EQ(!rpc_service_, true, "rpc_service_ must null");
PADDLE_ENFORCE_EQ(!rpc_service_, true, platform::errors::InvalidArgument(
"rpc_service_ must null"));
std::string endpoint = Attr<std::string>("endpoint");
VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
......@@ -215,7 +217,8 @@ void FlListenAndServOp::RunImpl(const framework::Scope &scope,
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
PADDLE_ENFORCE_GE(
optimize_blocks.size(), 1,
"optimize blocks should be 1 at least on the pserver side.");
platform::errors::InvalidArgument(
"optimize blocks should be 1 at least on the pserver side."));
auto *program = optimize_blocks[0]->Program();
framework::Executor executor(dev_place);
......
......@@ -3674,10 +3674,11 @@ def spectral_norm(weight, dim=0, power_iters=1, eps=1e-12, name=None):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
weight = fluid.data(name='weight', shape=[2, 8, 32, 32], dtype='float32')
x = fluid.layers.spectral_norm(weight=weight, dim=1, power_iters=2)
paddle.enable_static()
weight = paddle.data(name='weight', shape=[2, 8, 32, 32], dtype='float32')
x = paddle.static.nn.spectral_norm(weight=weight, dim=1, power_iters=2)
"""
helper = LayerHelper('spectral_norm', **locals())
check_variable_and_dtype(weight, 'weight', ['float32', 'float64'],
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import math
class TestAdagradOpV2(unittest.TestCase):
def test_v20_coverage(self):
paddle.disable_static()
inp = paddle.rand(shape=[10, 10])
linear = paddle.nn.Linear(10, 10)
out = linear(inp)
loss = paddle.mean(out)
adagrad = paddle.optimizer.Adagrad(
learning_rate=0.1, parameters=linear.parameters())
out.backward()
adagrad.step()
adagrad.clear_grad()
if __name__ == "__main__":
unittest.main()
......@@ -1369,7 +1369,7 @@ class TestLayer(LayerTest):
dy_rlt_value = dy_ret.numpy()
with self.dynamic_graph():
instanceNorm = paddle.nn.InstanceNorm(num_channels=shape[1])
instanceNorm = nn.InstanceNorm(num_channels=shape[1])
dy_ret = instanceNorm(base.to_variable(input))
dy_rlt_value2 = dy_ret.numpy()
......@@ -1380,7 +1380,7 @@ class TestLayer(LayerTest):
with self.static_graph():
# the input of InstanceNorm must be Variable.
def test_Variable():
instanceNorm = paddle.nn.InstanceNorm(num_channels=shape[1])
instanceNorm = nn.InstanceNorm(num_channels=shape[1])
ret1 = instanceNorm(input)
self.assertRaises(TypeError, test_Variable)
......@@ -1388,7 +1388,7 @@ class TestLayer(LayerTest):
# the input dtype of InstanceNorm must be float32 or float64
def test_type():
input = np.random.random(shape).astype('int32')
instanceNorm = paddle.nn.InstanceNorm(num_channels=shape[1])
instanceNorm = nn.InstanceNorm(num_channels=shape[1])
ret2 = instanceNorm(input)
self.assertRaises(TypeError, test_type)
......
......@@ -139,7 +139,6 @@ from .layer.norm import SyncBatchNorm #DEFINE_ALIAS
from .layer.norm import GroupNorm #DEFINE_ALIAS
from .layer.norm import LayerNorm #DEFINE_ALIAS
from .layer.norm import SpectralNorm #DEFINE_ALIAS
from .layer.norm import InstanceNorm #DEFINE_ALIAS
from .layer.norm import InstanceNorm1d #DEFINE_ALIAS
from .layer.norm import InstanceNorm2d #DEFINE_ALIAS
from .layer.norm import InstanceNorm3d #DEFINE_ALIAS
......
......@@ -102,7 +102,7 @@ from .norm import SyncBatchNorm #DEFINE_ALIAS
from .norm import GroupNorm #DEFINE_ALIAS
from .norm import LayerNorm #DEFINE_ALIAS
from .norm import SpectralNorm #DEFINE_ALIAS
from .norm import InstanceNorm #DEFINE_ALIAS
#from .norm import InstanceNorm #DEFINE_ALIAS
from .norm import LocalResponseNorm #DEFINE_ALIAS
# from .rnn import RNNCell #DEFINE_ALIAS
# from .rnn import GRUCell #DEFINE_ALIAS
......
......@@ -28,7 +28,7 @@
# TODO: define normalization api
import six
from ...fluid.dygraph.nn import InstanceNorm
#from ...fluid.dygraph.nn import InstanceNorm
from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS
#from ...fluid.dygraph import GroupNorm #DEFINE_ALIAS
......@@ -54,19 +54,9 @@ from ...fluid.dygraph.base import no_grad
from .. import functional as F
__all__ = [
'BatchNorm',
'GroupNorm',
'LayerNorm',
'SpectralNorm',
'InstanceNorm',
'BatchNorm1d',
'BatchNorm2d',
'BatchNorm3d',
'InstanceNorm1d',
'InstanceNorm2d',
'InstanceNorm3d',
'SyncBatchNorm',
'LocalResponseNorm',
'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'BatchNorm1d',
'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d', 'InstanceNorm2d',
'InstanceNorm3d', 'SyncBatchNorm', 'LocalResponseNorm'
]
......
......@@ -20,11 +20,12 @@ __all__ = [
]
from ..fluid.optimizer import Momentum, Adagrad, Dpsgd, DecayedAdagrad, Ftrl,\
from ..fluid.optimizer import Momentum, Dpsgd, DecayedAdagrad, Ftrl,\
AdagradOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, \
FtrlOptimizer, AdadeltaOptimizer
from .optimizer import Optimizer
from .adagrad import Adagrad
from .adam import Adam
from .adamw import AdamW
from .adamax import Adamax
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable
__all__ = ["Adagrad"]
class Adagrad(Optimizer):
"""
The Adaptive Gradient optimizer (Adagrad for short) use an optimization described
in paper: `Adaptive Subgradient Methods for Online Learning and
Stochastic Optimization <http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf>`_.
The parameter ``param_out`` update rule with gradient ``grad``:
.. math::
moment\_out &= moment + grad * grad
param\_out &= param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon}
The original paper does not have the ``epsilon`` attribute. It is added here
in our implementation as also proposed `Per-parameter adaptive learning rate
methods <http://cs231n.github.io/neural-networks-3/#ada>`_
for numerical stability to avoid the division by zero error.
Args:
learning_rate (float|Tensor): The learning rate used to update ``Parameter``.
It can be a float value or a ``Variable`` with a float type.
epsilon (float, optional): A small float value for numerical stability.
The default value is 1e-06.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies,
ClipGradByGlobalNorm, ClipGradByNorm and ClipGradByValue. Default None,
meaning there is no gradient clipping.
name (str, optional): Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`.
The default value is None.
initial_accumulator_value (float, optional): Initial value for moment accumulator.
The default value is 0.0.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = paddle.rand(shape=[10, 10])
linear = paddle.nn.Linear(10, 10)
out = linear(inp)
loss = paddle.mean(out)
adagrad = paddle.optimizer.Adagrad(learning_rate=0.1,
parameters=linear.parameters())
out.backward()
adagrad.step()
adagrad.clear_grad()
"""
_moment_acc_str = "moment"
def __init__(self,
learning_rate,
epsilon=1.0e-6,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None,
initial_accumulator_value=0.0):
assert learning_rate is not None
assert epsilon is not None
super(Adagrad, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "adagrad"
self._epsilon = epsilon
self.initial_accumulator_value = initial_accumulator_value
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(
self._moment_acc_str,
p,
fill_value=self.initial_accumulator_value)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])
# Create the adagrad optimizer op
adagrad_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
attrs={"epsilon": self._epsilon},
stop_gradient=True)
return adagrad_op
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册