diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 1d109259f399a94323713f89e9c2333d7b720579..46107341a4edc3ec6bb5569c3fa50bbd9243bd26 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -141,11 +141,13 @@ class VarBase { void RunBackward(); void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, - int pre_op_out_idx, bool stop_gradient) { + int pre_op_out_idx, bool pre_op_stop_gradient) { pre_op_ = pre_op; pre_op_out_name_ = pre_op_out_name; pre_op_out_idx_ = pre_op_out_idx; - stop_gradient_ = stop_gradient; + if (pre_op_stop_gradient) { + stop_gradient_ = pre_op_stop_gradient; + } } void ClearGradient() { diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 57c45f764baa686fe749871ba948ad6bf581d2bc..c338c65a76b86cd28ee7cc8e38a422c4e382a9de 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -51,9 +51,8 @@ class Layer(core.Layer): return params def clear_gradients(self): - print([p.name for p in self.parameters()]) for p in self.parameters(): - if p.name not in set(['batch_norm_0.w_2', 'batch_norm_0.w_1']): + if not p._stop_gradient: p._clear_gradient() def _build_once(self, inputs): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index dfaaae0de3640dc3011a0f617896bfe790119861..c27fd0b8024a8fa3310a62de34299fb621e2902f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -168,22 +168,22 @@ class ResNet(fluid.imperative.Layer): self.pool2d_max = Pool2D( pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - # self.bottleneck_block_list = [] - # num_channels = 64 - # for block in range(len(depth)): - # shortcut = False - # for i in range(depth[block]): - # bottleneck_block = BottleneckBlock( - # num_channels=num_channels, - # num_filters=num_filters[block], - # stride=2 if i == 0 and block != 0 else 1, - # shortcut=shortcut) - # num_channels = bottleneck_block._num_channels_out - # self.bottleneck_block_list.append(bottleneck_block) - # shortcut = True - - # self.pool2d_avg = Pool2D( - # pool_size=7, pool_type='avg', global_pooling=True) + self.bottleneck_block_list = [] + num_channels = 64 + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = BottleneckBlock( + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut) + num_channels = bottleneck_block._num_channels_out + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) @@ -196,9 +196,9 @@ class ResNet(fluid.imperative.Layer): def forward(self, inputs): y = self.conv(inputs) y = self.pool2d_max(y) - # for bottleneck_block in self.bottleneck_block_list: - # y = bottleneck_block(y) - # y = self.pool2d_avg(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) y = self.out(y) return y @@ -209,7 +209,7 @@ class TestImperativeResnet(unittest.TestCase): batch_size = train_parameters["batch_size"] batch_num = 1 - with fluid.imperative.guard(place=fluid.CPUPlace()): + with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -275,9 +275,8 @@ class TestImperativeResnet(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CPUPlace()) - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) resnet = ResNet() optimizer = optimizer_setting(train_parameters) @@ -347,7 +346,6 @@ class TestImperativeResnet(unittest.TestCase): static_grad_value[static_grad_name_list[ i - grad_start_pos]] = out[i] - print(static_out, dy_out) self.assertTrue(np.allclose(static_out, dy_out)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) @@ -358,9 +356,7 @@ class TestImperativeResnet(unittest.TestCase): self.assertEqual(len(dy_grad_value), len(static_grad_value)) for key, value in six.iteritems(static_grad_value): - if not np.allclose(value, dy_grad_value[key]): - print(key) - #self.assertTrue(np.allclose(value, dy_grad_value[key])) + self.assertTrue(np.allclose(value, dy_grad_value[key])) self.assertTrue(np.isfinite(value.all())) self.assertFalse(np.isnan(value.any()))