diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index dc641cdd1afbecfc9122c9f2e8ce6fac77b53f21..029db7d2dd4b7def8cea374e3f2ed31226f2bc18 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -176,7 +176,6 @@ class L1DecayRegularizer(WeightDecayRegularizer): dtype="float32", shape=param.shape, lod_level=param.lod_level) if grad.type == core.VarDesc.VarType.SELECTED_ROWS: - # add concat_rows decay = block.create_var( dtype="float32", shape=param.shape, diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index caa9596a100de4f9364467690db1e80ee227c3c1..fa38bd3762423497b82c3b421b3a1db4cd87525b 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -181,7 +181,10 @@ def train_main(use_cuda, is_sparse, is_local=True): cost = pd.cross_entropy(input=rnn_out, label=label) avg_cost = pd.mean(cost) - optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) + optimizer = fluid.optimizer.Adagrad( + learning_rate=1e-4, + regularization=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.1)) optimize_ops, params_grads = optimizer.minimize(avg_cost) train_data = paddle.batch(