未验证 提交 bfb6ac81 编写于 作者: C chengduo 提交者: GitHub

Fix optimizer bug (#19410)

* fix optimizer bug
test=develop
上级 6fb310ae
......@@ -712,8 +712,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None,
parameters = parameter_list
else:
params = program.global_block().all_parameters()
program.global_block().iter_parameters()
parameters = [param.name for param in params]
parameters = [param.name for param in params if param.trainable]
params_and_grads = []
for param in parameters:
......
......@@ -360,8 +360,9 @@ class Optimizer(object):
global_block = framework.default_main_program().global_block()
start = len(global_block.ops)
self.helper = LayerHelper(self.__class__.__name__)
self._create_accumulators(global_block,
[p[0] for p in parameters_and_grads])
self._create_accumulators(
global_block,
[p[0] for p in parameters_and_grads if p[0].trainable])
self._create_global_learning_rate()
optimize_ops = []
......@@ -587,6 +588,20 @@ class Optimizer(object):
tuple: (optimize_ops, params_grads) which are, list of operators appended;
and list of (param, grad) Variables pair for optimization.
"""
assert isinstance(loss, Variable), "The loss should be an Variable."
if no_grad_set is None:
no_grad_set = set()
elif isinstance(no_grad_set, set) or isinstance(
no_grad_set, list) or isinstance(no_grad_set, tuple):
no_grad_set = set(no_grad_set)
else:
assert "no_grad_set should be a set, but the passed type is {}".format(
type(no_grad_set))
parameters = loss.block.program.global_block().all_parameters()
param_no_trainable = set(
[param.name for param in parameters if param.trainable is False])
# If the parameter is no trainable, it should not have a gradient.
no_grad_set.update(param_no_trainable)
params_grads = self.backward(
loss,
startup_program=startup_program,
......@@ -1404,7 +1419,7 @@ class AdamOptimizer(Optimizer):
assert isinstance(block, framework.Block)
main_block = block.program.global_block()
for param, grad in param_and_grads:
if grad is None:
if grad is None or param.trainable is False:
continue
with param.block.program._optimized_guard(
[param, grad]), name_scope("optimizer"):
......@@ -1567,7 +1582,7 @@ class AdamaxOptimizer(Optimizer):
assert isinstance(block, framework.Block)
main_block = block.program.global_block()
for param, grad in parameters_and_grads:
if grad is None:
if grad is None or param.trainable is False:
continue
with param.block.program._optimized_guard(
[param, grad]), name_scope('adamx'):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from collections import Counter
import unittest
import paddle.fluid as fluid
from simple_nets import init_data
def test_trainable():
x = fluid.layers.data(name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feature = fluid.layers.fc(input=x,
size=10,
param_attr=fluid.ParamAttr(trainable=False))
loss = fluid.layers.cross_entropy(input=feature, label=label)
loss = fluid.layers.mean(loss)
return loss
class TestTrainable(unittest.TestCase):
def check_trainable(self,
model,
feed_dict,
op_count,
optimizer=fluid.optimizer.Adam()):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = model()
optimizer.minimize(loss)
# The number of adam should be one.
ops = Counter([op.type for op in main.global_block().ops])
for op in op_count:
if op_count[op] == 0:
assert op not in ops
else:
assert ops[op] == op_count[op]
exe.run(fluid.default_startup_program())
exe.run(feed=feed_dict)
def test_trainable(self):
batch_size = 2
img, label = init_data(batch_size, img_shape=[784], label_range=9)
feed_dict = {'image': img, 'label': label}
# Note that, because the Weight of FC is not trainable and the x is stop_gradient,
# so the 'mul_grad' should not be appended.
self.check_trainable(
test_trainable,
feed_dict,
op_count={'adam': 1,
'scale': 2,
'mul_grad': 0})
self.check_trainable(
test_trainable,
feed_dict,
op_count={'adamax': 1,
'scale': 1,
'mul_grad': 0},
optimizer=fluid.optimizer.Adamax(learning_rate=0.2))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部