diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 3ddd73080bd0c24c0b5b12e84b36aed515119c9c..17798e359cda328489d0476eb6a0f40575594436 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1307,6 +1307,17 @@ class Block(object): outputs=kwargs.get("outputs", None), attrs=kwargs.get("attrs", None)) self.ops.append(op) + + # set stop_gradient in static mode + if kwargs.get("stop_gradient", False): + outputs = kwargs.get("outputs", None) + if outputs is not None: + for k, v in six.iteritems(outputs): + if isinstance(v, Variable): + v.stop_gradient = True + elif isinstance(v, list) or isinstance(v, tuple): + for var in v: + var.stop_gradient = True self._trace_op(op, kwargs.get("stop_gradient", False)) return op diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 140c0ff037d453641cc119301269121025e17cbd..fe5014f5e6636f541f92ad5f72280f6558dc6f74 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -332,21 +332,16 @@ class BatchNorm(layers.Layer): shape=param_shape, dtype=self._dtype, default_initializer=Constant(1.0)) - - # TODO(minqiyang): change stop_gradient sign to trainable to align with static graph - # # setting stop_gradient=True to reduce computation - # if use_global_stats and self._helper.param_attr.learning_rate == 0.: - # self._scale.stop_gradient = True + if use_global_stats and self._helper.param_attr.learning_rate == 0.: + self._scale.stop_gradient = True self._bias = self._helper.create_parameter( attr=self._helper.bias_attr, shape=param_shape, dtype=self._dtype, is_bias=True) - # TODO(minqiyang): change stop_gradient sign to trainable to align with static graph - # # setting stop_gradient=True to reduce computation - # if use_global_stats and self._helper.bias_attr.learning_rate == 0.: - # self._bias.stop_gradient = True + if use_global_stats and self._helper.bias_attr.learning_rate == 0.: + self._bias.stop_gradient = True self._mean = self._helper.create_parameter( attr=ParamAttr( diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 14f4276e2f4fc4a24d701ef05c94b88c4f0336da..e0e781a322b3eb68e3f54a66252a8d8b11a9a56f 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -387,7 +387,7 @@ class Optimizer(object): params_grads = [] for param in parameters: - if param.stop_gradient: + if param.stop_gradient or not param.trainable: continue # create gradient variable grad_var = Variable( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index d0a5a883174cb33a035b344f9489b2ba02ba99f1..91637cac5ba1b1533e284a4aa4bfe4618b33a335 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -98,7 +98,7 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): - def test_mnist_cpu_float32(self): + def test_mnist_float32(self): seed = 90 with fluid.imperative.guard(): @@ -196,11 +196,10 @@ class TestImperativeMnist(unittest.TestCase): static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): - self.assertTrue( - np.allclose(value.all(), dy_param_init_value[key].all())) - self.assertTrue(np.allclose(static_out.all(), dy_out.all())) + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value.all(), dy_param_value[key].all())) + self.assertTrue(np.allclose(value, dy_param_value[key])) if __name__ == '__main__':