diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 322fd65cc7d8c319e36fdfa6bffd1b32b5d357d7..dab6e53ca4ddadd4ff4249e412d322f43f67ab43 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -10199,7 +10199,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): import numpy as np inputs = fluid.layers.data(name="x", shape=[2, 3], dtype='float32') - scale = fluid.layers.data(name="scale", shape=[1], dtype='float32' + scale = fluid.layers.data(name="scale", shape=[1], dtype='float32', append_batch_size=False) output = fluid.layers.scale(inputs, scale = scale, bias = 1.0) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 97c5168f205b2f7733bab9d277a45b527f1de74d..904490b5df093d25457ba419f68014b3b478efaa 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1549,7 +1549,7 @@ class AdamOptimizer(Optimizer): avg_cost = fluid.layers.mean(cost) # define beta decay variable - def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate) + def get_decayed_betas(beta1_init, beta2_init, decay_steps, decay_rate): global_step = lr_scheduler._decay_step_counter() beta1 = fluid.layers.create_global_var( @@ -1578,7 +1578,7 @@ class AdamOptimizer(Optimizer): beta1, beta2 = get_decayed_betas(0.9, 0.99, 1e5, 0.9) adam_optimizer = fluid.optimizer.AdamOptimizer( learning_rate=0.01, - beta1=beta1 + beta1=beta1, beta2=beta2) adam_optimizer.minimize(avg_cost)