From 40fddf2145424a3084d12df7590a4532d6df895b Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Thu, 28 Nov 2019 17:50:14 +0800 Subject: [PATCH] update adam_op, scale_op, batch_norm_op doc. test=develop (#1619) * update adam_op, scale_op, batch_norm_op doc. test=develop * fix batch_norm example indent. test=develop --- doc/fluid/api_cn/layers_cn/batch_norm_cn.rst | 27 ++++++- doc/fluid/api_cn/layers_cn/scale_cn.rst | 18 ++++- .../api_cn/optimizer_cn/AdamOptimizer_cn.rst | 70 ++++++++++++++++--- 3 files changed, 102 insertions(+), 13 deletions(-) diff --git a/doc/fluid/api_cn/layers_cn/batch_norm_cn.rst b/doc/fluid/api_cn/layers_cn/batch_norm_cn.rst index e5e34c2bb..0d9ab01c4 100644 --- a/doc/fluid/api_cn/layers_cn/batch_norm_cn.rst +++ b/doc/fluid/api_cn/layers_cn/batch_norm_cn.rst @@ -41,7 +41,7 @@ moving_mean和moving_var是训练过程中统计得到的全局均值和方差 - **input** (Variable) - batch_norm算子的输入特征,是一个Variable类型,输入维度可以是 2, 3, 4, 5。数据类型:flaot16, float32, float64。 - **act** (string)- 激活函数类型,可以是leaky_realu、relu、prelu等。默认:None。 - **is_test** (bool) - 指示它是否在测试阶段,非训练阶段使用训练过程中统计到的全局均值和全局方差。默认:False。 - - **momentum** (float)- 此值用于计算 moving_mean 和 moving_var。更新公式为: :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` , :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` , 默认:0.9。 + - **momentum** (float|Variable)- 此值用于计算 moving_mean 和 moving_var,是一个float类型或者一个shape为[1],数据类型为float32的Variable类型。更新公式为: :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` , :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` , 默认:0.9。 - **epsilon** (float)- 加在分母上为了数值稳定的值。默认:1e-5。 - **param_attr** (ParamAttr|None) :指定权重参数属性的对象。默认值为None,表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。batch_norm算子默认的权重初始化是1.0。 - **bias_attr** (ParamAttr|None)- 指定偏置参数属性的对象。默认值为None,表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。batch_norm算子默认的偏置初始化是0.0。 @@ -75,4 +75,29 @@ moving_mean和moving_var是训练过程中统计得到的全局均值和方差 output = exe.run(feed={"x": np_x}, fetch_list = [hidden2]) print(output) +.. code-block:: python + + # batch_norm with momentum as Variable + import paddle.fluid as fluid + import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler + + def get_decay_momentum(momentum_init, decay_steps, decay_rate): + global_step = lr_scheduler._decay_step_counter() + momentum = fluid.layers.create_global_var( + shape=[1], + value=float(momentum_init), + dtype='float32', + # set persistable for save checkpoints and resume + persistable=True, + name="momentum") + div_res = global_step / decay_steps + decayed_momentum = momentum_init * (decay_rate**div_res) + fluid.layers.assign(decayed_momentum, momentum) + + return momentum + + x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32') + hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') + momentum = get_decay_momentum(0.9, 1e5, 0.9) + hidden2 = fluid.layers.batch_norm(input=hidden1, momentum=momentum) diff --git a/doc/fluid/api_cn/layers_cn/scale_cn.rst b/doc/fluid/api_cn/layers_cn/scale_cn.rst index a7cf64158..26b1a42fb 100644 --- a/doc/fluid/api_cn/layers_cn/scale_cn.rst +++ b/doc/fluid/api_cn/layers_cn/scale_cn.rst @@ -21,7 +21,7 @@ scale 参数: - **x** (Variable) - 要进行缩放的多维Tensor,数据类型可以为float32,float64,int8,int16,int32,int64,uint8。 - - **scale** (float) - 缩放的比例。 + - **scale** (float|Variable) - 缩放的比例,是一个float类型或者一个shape为[1],数据类型为float32的Variable类型。 - **bias** (float) - 缩放的偏置。 - **bias_after_scale** (bool) - 判断在缩放之前或之后添加偏置。为True时,先缩放再偏置;为False时,先偏置再缩放。该参数在某些情况下,对数值稳定性很有用。 - **act** (str,可选) - 应用于输出的激活函数,如tanh、softmax、sigmoid、relu等。 @@ -49,11 +49,23 @@ scale res = exe.run(fluid.default_main_program(), feed={'x':img}, fetch_list=[output]) print(res) # [array([[ 3., 5., 7.], [ 9., 11., 13.]], dtype=float32)] +.. code-block:: python + # scale with parameter scale as Variable + import paddle.fluid as fluid + import numpy as np + inputs = fluid.layers.data(name="x", shape=[2, 3], dtype='float32') + scale = fluid.layers.data(name="scale", shape=[1], dtype='float32' + append_batch_size=False) + output = fluid.layers.scale(inputs, scale = scale, bias = 1.0) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + img = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) + scale_np = np.array([2.]).astype(np.float32) - - + res = exe.run(fluid.default_main_program(), feed={'x':img, 'scale':scale_np}, fetch_list=[output]) + print(res) # [array([[ 3., 5., 7.], [ 9., 11., 13.]], dtype=float32)] diff --git a/doc/fluid/api_cn/optimizer_cn/AdamOptimizer_cn.rst b/doc/fluid/api_cn/optimizer_cn/AdamOptimizer_cn.rst index f6708c74d..26954add5 100644 --- a/doc/fluid/api_cn/optimizer_cn/AdamOptimizer_cn.rst +++ b/doc/fluid/api_cn/optimizer_cn/AdamOptimizer_cn.rst @@ -24,8 +24,8 @@ Adam优化器出自 `Adam论文 `_ 的第二节 参数: - **learning_rate** (float|Variable,可选) - 学习率,用于参数更新的计算。可以是一个浮点型值或者一个值为浮点型的Variable,默认值为0.001 - - **beta1** (float, 可选) - 一阶矩估计的指数衰减率,默认值为0.9 - - **beta2** (float, 可选) - 二阶矩估计的指数衰减率,默认值为0.999 + - **beta1** (float|Variable, 可选) - 一阶矩估计的指数衰减率,是一个float类型或者一个shape为[1],数据类型为float32的Variable类型。默认值为0.9 + - **beta2** (float|Variable, 可选) - 二阶矩估计的指数衰减率,是一个float类型或者一个shape为[1],数据类型为float32的Variable类型。默认值为0.999 - **epsilon** (float, 可选) - 保持数值稳定性的短浮点类型值,默认值为1e-08 - **regularization** (WeightDecayRegularizer, 可选) - 正则化函数,用于减少泛化误差。例如可以是 :ref:`cn_api_fluid_regularizer_L2DecayRegularizer` ,默认值为None - **name** (str, 可选)- 该参数供开发人员打印调试信息时使用,具体用法请参见 :ref:`api_guide_Name` ,默认值为None @@ -59,6 +59,65 @@ Adam优化器出自 `Adam论文 `_ 的第二节 for data in train_reader(): exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) +.. code-block:: python + + # Adam with beta1/beta2 as Variable + import paddle + import paddle.fluid as fluid + import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler + + place = fluid.CPUPlace() + main = fluid.Program() + with fluid.program_guard(main): + x = fluid.data(name='x', shape=[None, 13], dtype='float32') + y = fluid.data(name='y', shape=[None, 1], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + + # define beta decay variable + def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate) + global_step = lr_scheduler._decay_step_counter() + + beta1 = fluid.layers.create_global_var( + shape=[1], + value=float(beta1_init), + dtype='float32', + # set persistable for save checkpoints and resume + persistable=True, + name="beta1") + beta2 = fluid.layers.create_global_var( + shape=[1], + value=float(beta2_init), + dtype='float32', + # set persistable for save checkpoints and resume + persistable=True, + name="beta2") + + div_res = global_step / decay_steps + decayed_beta1 = beta1_init * (decay_rate**div_res) + decayed_beta2 = beta2_init * (decay_rate**div_res) + fluid.layers.assign(decayed_beta1, beta1) + fluid.layers.assign(decayed_beta2, beta2) + + return beta1, beta2 + + beta1, beta2 = get_decayed_betas(0.9, 0.99, 1e5, 0.9) + adam_optimizer = fluid.optimizer.AdamOptimizer( + learning_rate=0.01, + beta1=beta1 + beta2=beta2) + adam_optimizer.minimize(avg_cost) + + fetch_list = [avg_cost] + train_reader = paddle.batch( + paddle.dataset.uci_housing.train(), batch_size=1) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for data in train_reader(): + exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) + .. py:method:: minimize(loss, startup_program=None, parameter_list=None, no_grad_set=None, grad_clip=None) @@ -100,10 +159,3 @@ Adam优化器出自 `Adam论文 `_ 的第二节 feed={'X': x, 'Y': y}, fetch_list=[loss.name]) - - - - - - - -- GitLab