diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 02d9ef866c4a8b6d46c87c812ac510cdd90810fe..cf330cda5e4267777754988325b3fc64382aaabf 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -61,6 +61,9 @@ class Autograd { for (size_t i = 0; i < input_grads.size(); ++i) { if (!input_grads[i]) continue; + if (ready_op->input_vars_->at(i)->stop_gradient_) { + continue; + } OpBase* pre_op = ready_op->pre_ops_->at(i); if (!pre_op) continue; @@ -152,7 +155,7 @@ void VarBase::ApplyGrad(framework::Scope* scope, Variable* grad) { } std::vector OpBase::ApplyGrad(framework::Scope* scope) { - VLOG(3) << "op grad " << grad_op_desc_->Type(); + VLOG(3) << "op grad type: " << grad_op_desc_->Type(); for (const std::string& grad_invar : grad_op_desc_->InputArgumentNames()) { if (grad_to_var_->find(grad_invar) == grad_to_var_->end()) { diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index f6dac762fd7074f3bffa26e8b4aa69733009433c..776f228875e4a302505e1fec0ab2a5b85a5b0270 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -93,6 +93,8 @@ class Tracer { LOG(ERROR) << "tracer doesn't support yet"; } } + + outputs[i]->stop_gradient_ = stop_gradient; outputs[i]->var_ = var; outputs[i]->pre_op_ = op; outputs[i]->pre_op_out_idx_ = i; @@ -106,6 +108,7 @@ class Tracer { CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var); op->grad_op_desc_ = grad_op_desc; op->grad_to_var_ = grad_to_var; + VLOG(3) << "tracer create grad op " << grad_op_desc->Type(); } op->block_ = block; } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 613025d3c68a7fa4bd147281e47150a6dae45d6a..541c757389f7d7ae99386df6083d441358af0730 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -9348,7 +9348,7 @@ class PyFuncRegistry(object): raise TypeError('func must be a Python function') self._func = func - # find named args using reflection + # find named args using reflection args = inspect.getargspec(self._func) if len(args[0]) == 0 and args[1] is None and args[2] is None: # Function with no inputs @@ -9359,15 +9359,15 @@ class PyFuncRegistry(object): ''' Why record self here? - 1. For debug usage. Users can call - :code:`py_func.registered_func(idx)` method + 1. For debug usage. Users can call + :code:`py_func.registered_func(idx)` method to find the registered function corresponding - to :code:`idx`. + to :code:`idx`. - 2. For increasing reference count of self. - It seems that to release Python object + 2. For increasing reference count of self. + It seems that to release Python object whose reference count is 1 would cause - segmentation fault error in C++ side. + segmentation fault error in C++ side. May be lack of Python GC in C++ side? ''' PyFuncRegistry._register_funcs.append(self) @@ -9418,7 +9418,7 @@ class PyFuncRegistry(object): def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None): """ PyFunc Operator. - + User can use :code:`py_func` to register operators in Python side. The inputs of :code:`func` is :code:`LoDTensor` and outputs can be numpy array or :code:`LoDTensor`. Paddle would call the registered @@ -9436,7 +9436,7 @@ def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None): no gradient, users should return None. This function can also be used to debug the running network. User can - add a :code:`py_func` operator without output, and print input + add a :code:`py_func` operator without output, and print input :code:`x` inside :code:`func`. Args: @@ -9444,50 +9444,50 @@ def py_func(func, x, out, backward_func=None, skip_vars_in_backward_input=None): x (Variable|list(Variable)|tuple(Variable)): inputs of :code:`func`. out (Variable|list(Variable)|tuple(Variable)): outputs of :code:`func`. Paddle cannot infer shapes and data types of :code:`out`. Users - should create :code:`out` beforehand. + should create :code:`out` beforehand. backward_func (callable|None): backward Python function. - None means no backward. Default None. + None means no backward. Default None. skip_vars_in_backward_input (Variable|list(Variable)|tuple(Variable)): - Variables that are not needed in :code:`backward_func` inputs. + Variables that are not needed in :code:`backward_func` inputs. These variables must be any of :code:`x` and :code:`out`. If set, these vars would not be inputs of :code:`backward_func`, - Only useful when :code:`backward_func` is not None. Default None. + Only useful when :code:`backward_func` is not None. Default None. Returns: out (Variable|list(Variable)|tuple(Variable)): input :code:`out` Examples: - + >>> import paddle.fluid as fluid >>> import six >>> >>> def create_tmp_var(name, dtype, shape): >>> return fluid.default_main_program().current_block().create_var( - >>> name=name, dtype=dtype, shape=shape) + >>> name=name, dtype=dtype, shape=shape) >>> >>> # tanh activation has been provided by Paddle C++ op - >>> # Here, we only use tanh to be an example to show the usage + >>> # Here, we only use tanh to be an example to show the usage >>> # of py_func >>> def tanh(x): >>> return np.tanh(x) - >>> + >>> >>> # forward input x is skipped >>> def tanh_grad(y, dy): >>> return np.array(dy) * (1 - np.square(np.array(y))) >>> >>> def debug_func(x): - >>> print(x) + >>> print(x) >>> >>> def simple_net(img, label): >>> hidden = img >>> for idx in six.moves.range(4): >>> hidden = fluid.layers.fc(hidden, size=200) >>> new_hidden = create_tmp_var(name='hidden_{}'.format(idx), - >>> dtype=hidden.dtype, shape=hidden.shape) + >>> dtype=hidden.dtype, shape=hidden.shape) >>> >>> # user-defined layers with forward and backward - >>> hidden = fluid.layers.py_func(func=tanh, x=hidden, - >>> out=new_hidden, backward_func=tanh_grad, + >>> hidden = fluid.layers.py_func(func=tanh, x=hidden, + >>> out=new_hidden, backward_func=tanh_grad, >>> skip_vars_in_backward_input=hidden) >>> >>> # user-defined debug layers to print variables @@ -9666,14 +9666,15 @@ class FC(layers.PyLayer): param_attr=None, num_flatten_dims=1, dtype=core.VarDesc.VarType.FP32): - super(FC, self).__init__() + super(FC, self).__init__(param_attr=param_attr) self._size = size self._num_flatten_dims = num_flatten_dims self._dtype = dtype - self._helper = LayerHelper('FC', param_attr=param_attr) + self._tmp = self._helper.create_variable_for_type_inference(self._dtype) + self._out = self._helper.create_variable_for_type_inference(self._dtype) def _build_once(self, inputs): - input_shape = inputs[0].shape + input_shape = inputs.shape param_shape = [ reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1) ] + [self._size] @@ -9684,21 +9685,20 @@ class FC(layers.PyLayer): is_bias=False) def forward(self, inputs): - tmp = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op( type="mul", - inputs={"X": inputs[0], + inputs={"X": inputs, "Y": self._w}, - outputs={"Out": tmp}, + outputs={"Out": self._tmp}, attrs={ "x_num_col_dims": self._num_flatten_dims, "y_num_col_dims": 1 }) - out = self._helper.create_variable_for_type_inference(self._dtype) self._helper.append_op( type="sum", - inputs={"X": [tmp]}, - outputs={"Out": out}, + inputs={"X": [self._tmp]}, + outputs={"Out": self._out}, attrs={"use_mkldnn": False}) - return out + + return self._out diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative.py index 0fe69d1bd4b1b10c09879871c8cf1fc197d1106b..ccf0743ea6d41d8090349bee8bef97959f2af2a6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative.py +++ b/python/paddle/fluid/tests/unittests/test_imperative.py @@ -36,7 +36,7 @@ class MyLayer(fluid.imperative.PyLayer): super(MyLayer, self).__init__() def forward(self, inputs): - x = fluid.layers.relu(inputs[0]) + x = fluid.layers.relu(inputs) self._x_for_debug = x return [fluid.layers.elementwise_mul(x, x)] @@ -52,7 +52,7 @@ class MLP(fluid.imperative.PyLayer): initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): - x = self._fc1(inputs[0]) + x = self._fc1(inputs) x = self._fc2(x) x = fluid.layers.reduce_sum(x) return x @@ -64,13 +64,14 @@ class TestImperative(unittest.TestCase): cl = core.Layer() cl.forward([]) l = fluid.imperative.PyLayer() - l.forward([]) + self.assertRaises(NotImplementedError, l.forward, []) def test_layer_in_out(self): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) l = MyLayer() - x = l(np_inp)[0] + x = l(var_inp)[0] self.assertIsNotNone(x) dy_out = x._numpy() x._backward() @@ -95,8 +96,9 @@ class TestImperative(unittest.TestCase): def test_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) mlp = MLP() - out = mlp(np_inp) + out = mlp(var_inp) dy_out = out._numpy() out._backward() dy_grad = mlp._fc1._w._gradient() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py index a2e008615c228a3bd2c5eff5aaebc590baccc84f..802db5d1e034219ab7b3e818ec905ea045d66e05 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_mnist.py @@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase): mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) - for i in range(1): + for i in range(2): x_data = np.random.rand(128, 1, 28, 28).astype('float32') img = to_variable(x_data) y_data = np.random.rand(128, 1).astype('int64')