Merge pull request #15928 from velconia/imperative_backward_hooks

Imperative backward hooks

Merge pull request #15928 from velconia/imperative_backward_hooks
Imperative backward hooks
187cffd0 · Qiyang Min · GitHub · 1616c32a · e5f3435d · 187cffd0
18 changed file
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/block_desc.h"
 #include <queue>
+#include <unordered_set>
+#include <utility>
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/program_desc.h"
@@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
  ops_.erase(ops_.begin() + s, ops_.begin() + e);
 }
+void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) {
+  // TODO(minqiyang): make this faster
+  for (auto it = ops_.begin(); it != ops_.end(); ++it) {
+    if (it->get() == op_desc) {
+      ops_.erase(it);
+      break;
+    }
+  }
+}
 std::vector<OpDesc *> BlockDesc::AllOps() const {
  std::vector<OpDesc *> res;
  for (const auto &op : ops_) {
@@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const {
  return res;
 }
-void BlockDesc::Clear() {
-  // clear all ops
-  ops_.clear();
-  // clear all vars which are not persistable
-  for (auto it = vars_.begin(); it != vars_.end();) {
-    if (it->second->Persistable()) {
-      ++it;
-    } else {
-      vars_.erase(it++);
-    }
-  }
-}
 void BlockDesc::Flush() {
  for (auto &op_desc : ops_) {
    op_desc->Flush();

--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -93,12 +93,12 @@ class BlockDesc {
   */
  void RemoveOp(size_t s, size_t e);
+  void RemoveOpInternal(const OpDesc *op_desc);
  void RemoveVar(const std::string &name) { vars_.erase(name); }
  std::vector<OpDesc *> AllOps() const;
-  void Clear();
  size_t OpSize() const { return ops_.size(); }
  OpDesc *Op(int idx) const { return ops_.at(idx).get(); }

--- a/paddle/fluid/framework/python_headers.h
+++ b/paddle/fluid/framework/python_headers.h
@@ -24,3 +24,11 @@ limitations under the License. */
 #pragma pop_macro("_XOPEN_SOURCE")
 #pragma pop_macro("_POSIX_C_SOURCE")
+#if !defined(PYBIND11_HIDDEN)
+#ifdef _WIN32
+#define PYBIND11_HIDDEN __declspec(dllexport)
+#else
+#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
+#endif
+#endif
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -18,6 +18,7 @@
 #include <limits>
 #include <map>
 #include <random>
+#include <unordered_set>
 #include <utility>
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -139,6 +140,8 @@ class Autograd {
          }
        }
      }
+      ready_op->InvokeBackwardHooks();
    }
  }
@@ -156,8 +159,10 @@ class Autograd {
      for (auto it : candidate->pre_ops_) {
        for (OpBase* pre_op : it.second) {
          if (!pre_op) continue;
-          VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
+          VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
-                  << it.first << " <---- " << pre_op->op_desc_->Type();
+                  << candidate->trace_id_ << " <---- " << it.first << " <---- "
+                  << pre_op->op_desc_->Type() << " trace id "
+                  << pre_op->trace_id_;
          if (visited.find(pre_op) == visited.end()) {
            visited.insert(pre_op);
            queue.push_back(pre_op);
@@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
    return {};
  }
+  VLOG(3) << "apply op grad: " << op_desc_->Type();
  std::vector<framework::VariableValueMap> grad_outputs;
  if (backward_id_ > 0) {
    VLOG(3) << "py_layer_grad";
@@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
  return input_vars_;
 }
+void OpBase::InvokeBackwardHooks() {
+  VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
+  // call backward hooks
+  for (py::object& callable : backward_hooks_) {
+    callable(this);
+  }
+}
+void OpBase::RegisterBackwardHooks(const py::object& callable) {
+  VLOG(3) << "Register backward hooks " << trace_id_;
+  // TODO(minqiyang): check the callable format
+  backward_hooks_.push_back(callable);
+}
 void VarBase::RunBackward() {
  if (!pre_op_) return;

--- a/paddle/fluid/imperative/layer.h
+++ b/paddle/fluid/imperative/layer.h
@@ -123,22 +123,32 @@ class VarBase {
 private:
  VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient)
-      : var_desc_(nullptr),
+      : name_(),
+        var_desc_(nullptr),
        var_(var),
        grads_(grad),
+        block_(nullptr),
+        persistable_(false),
        stop_gradient_(stop_gradient),
        pre_op_(nullptr),
+        pre_op_out_name_(),
        pre_op_out_idx_(-1) {}
 public:
  virtual ~VarBase() {
+    // TODO(minqiyang): remove var desc from block desc
    if (var_) {
      delete var_;
+      var_ = nullptr;
    }
    if (grads_) {
      delete grads_;
+      grads_ = nullptr;
    }
+    pre_op_ = nullptr;
+    pre_op_out_idx_ = -1;
  }
  inline OpBase* PreOp() const { return pre_op_; }
@@ -151,6 +161,14 @@ class VarBase {
  void RunBackward();
+  inline void ResetPreOp(OpBase* op) {
+    if (op == pre_op_) {
+      // clear pre_op info when op equals to var's pre_op
+      pre_op_ = nullptr;
+      pre_op_out_idx_ = -1;
+    }
+  }
  void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
                  int pre_op_out_idx, bool pre_op_stop_gradient) {
    pre_op_ = pre_op;
@@ -184,11 +202,15 @@ class VarBase {
    return string::Sprintf("%s@IGrad", var_desc_->Name());
  }
+  std::string name_;
  framework::VarDesc* var_desc_;
  framework::Variable* var_;
  VarBase* grads_;
+  framework::BlockDesc* block_;
+  bool persistable_;
 private:
  bool stop_gradient_;
  OpBase* pre_op_;
@@ -199,15 +221,27 @@ class VarBase {
 /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
 * gradient. This object should be managed totally by Python intepreter.
 */
-class OpBase {
+class PYBIND11_HIDDEN OpBase {
 public:
  OpBase()
      : op_desc_(nullptr),
        forward_id_(-1),
        backward_id_(-1),
-        place_(platform::CPUPlace()) {}
+        trace_id_(-1),
+        place_(platform::CPUPlace()),
+        backward_hooks_() {}
  virtual ~OpBase() {
+    // TODO(minqiyang): remove op_desc from block_desc in tracer
+    //
+    // reset all output vars' pre op
+    for (auto iter : output_vars_) {
+      for (VarBase* var : iter.second) {
+        var->ResetPreOp(this);
+      }
+    }
+    // release resource
    for (framework::OpDesc* desc : grad_op_descs_) {
      delete desc;
    }
@@ -215,6 +249,10 @@ class OpBase {
  std::map<std::string, std::vector<VarBase*>> ApplyGrad();
+  void RegisterBackwardHooks(const py::object& callable);
+  void InvokeBackwardHooks();
  // One of `op_desc_` or `forward_id_` is set, not both.
  // For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
  framework::OpDesc* op_desc_;
@@ -225,6 +263,7 @@ class OpBase {
  // Note: each fwd op corresponds to a vector of bwd ops.
  std::vector<framework::OpDesc*> grad_op_descs_;
  int backward_id_;
+  int trace_id_;
  platform::Place place_;
@@ -239,6 +278,8 @@ class OpBase {
  std::vector<framework::VariableValueMap> grad_output_vars_;
  framework::BlockDesc* block_;
+  std::vector<py::object> backward_hooks_;
 };
 class Layer {

--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -14,7 +14,10 @@
 #include "paddle/fluid/imperative/tracer.h"
+#include <memory>
 #include <set>
+#include <unordered_map>
+#include <unordered_set>
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -110,7 +113,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
  std::map<std::string, VarBase*> vars;
  framework::OpDesc* op_desc = op->op_desc_;
-  VLOG(3) << "tracer tracing " << op_desc->Type();
+  VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
+          << op->trace_id_;
  op_desc->InferShape(*block);
  op_desc->InferVarType(block);
@@ -133,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
      if (inp->PreOp() && !inp->IsStopGradient()) {
        op->pre_ops_[it.first].push_back(inp->PreOp());
        op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
+        VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
      } else {
        op->pre_ops_[it.first].push_back(nullptr);
      }
      VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
-              << inp->var_->IsInitialized();
+              << inp->var_->IsInitialized() << " stop_gradient "
+              << inp->IsStopGradient();
    }
  }
@@ -189,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
    op->grad_input_vars_.resize(op->grad_op_descs_.size());
    op->grad_output_vars_.resize(op->grad_op_descs_.size());
    for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
      framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
      for (auto it : grad_op_desc->Inputs()) {
@@ -201,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
            PADDLE_ENFORCE(fwd_var_it != vars.end());
            // Forward inputs or outputs.
            grad_in_vars.push_back(fwd_var_it->second->var_);
-            vars_saved_for_backward.insert(it.first);
          } else {
            VarBase* var = vars[var_it->second];
            if (!var->grads_->var_->IsInitialized()) {
@@ -211,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
            // Douts.
            grad_in_vars.push_back(var->grads_->var_);
          }
+          vars_saved_for_backward.insert(it.first);
        }
      }

--- a/paddle/fluid/pybind/imperative.h
+++ b/paddle/fluid/pybind/imperative.h
@@ -33,7 +33,7 @@ class Layer : public imperative::Layer {
  }
 };
-class PyOpBase : public imperative::OpBase {
+class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase {
 public:
  using imperative::OpBase::OpBase;  // Inherit constructors
 };

--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -189,8 +189,6 @@ void BindBlockDesc(pybind11::module *m) {
             return self.HasVar(name);
           },
           pybind11::return_value_policy::reference)
-      .def("_clear_block", [](pd::BlockDesc &self) { return self.Clear(); },
-           pybind11::return_value_policy::reference)
      .def("_rename_var",
           [](pd::BlockDesc &self, const pybind11::bytes &byte_name,
              const pybind11::bytes &byte_name_new) {

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -177,6 +177,23 @@ PYBIND11_MODULE(core, m) {
           py::return_value_policy::take_ownership)
      .def("value", [](const imperative::VarBase &self) { return self.var_; },
           py::return_value_policy::reference)
+      .def_property("name",
+                    [](const imperative::VarBase &self) { return self.name_; },
+                    [](imperative::VarBase &self, const std::string &name) {
+                      self.name_ = name;
+                    })
+      .def_property("block",
+                    [](const imperative::VarBase &self) { return self.block_; },
+                    [](imperative::VarBase &self, framework::BlockDesc *block) {
+                      self.block_ = block;
+                    },
+                    py::return_value_policy::reference)
+      .def_property(
+          "persistable",
+          [](const imperative::VarBase &self) { return self.persistable_; },
+          [](imperative::VarBase &self, const bool persistable) {
+            self.persistable_ = persistable;
+          })
      .def_property(
          "desc",
          [](const imperative::VarBase &self) { return self.var_desc_; },
@@ -193,6 +210,10 @@ PYBIND11_MODULE(core, m) {
  py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
      .def(py::init<>())
+      .def("register_backward_hooks",
+           [](imperative::OpBase &self, const py::object &callable) {
+             self.RegisterBackwardHooks(callable);
+           })
      .def_property(
          "desc", [](const imperative::OpBase &self) { return self.op_desc_; },
          [](imperative::OpBase &self, framework::OpDesc *op_desc) {
@@ -201,6 +222,16 @@ PYBIND11_MODULE(core, m) {
            }
          },
          py::return_value_policy::reference)
+      .def_property("_trace_id",
+                    [](const imperative::OpBase &self) {
+                      pybind11::gil_scoped_release release;
+                      return self.trace_id_;
+                    },
+                    [](imperative::OpBase &self, int trace_id) {
+                      pybind11::gil_scoped_release release;
+                      self.trace_id_ = trace_id;
+                    },
+                    py::return_value_policy::reference)
      .def_property(
          "forward_id",
          [](const imperative::OpBase &self) { return self.forward_id_; },
@@ -413,11 +444,11 @@ PYBIND11_MODULE(core, m) {
           Set LoD of the LoDTensor according to recursive sequence length.
           For example, if recursive_sequence_lengths=[[2, 3]], meaning that
-           there are two sequences with length 2 and 3 respectively, the 
+           there are two sequences with length 2 and 3 respectively, the
-           corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].  
+           corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
           Args:
-                recursive_sequence_lengths (List[List[int]]): sequence lengths. 
+                recursive_sequence_lengths (List[List[int]]): sequence lengths.
           )DOC")
      .def("lod",
           [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
@@ -448,7 +479,7 @@ PYBIND11_MODULE(core, m) {
           Return the sequence length of the LoDTensor corresponding to LoD.
           Returns:
-               out (List[List[int]): the sequence lengths. 
+               out (List[List[int]): the sequence lengths.
           )DOC")
      .def("has_valid_recursive_sequence_lengths",
           [](LoDTensor &self) -> bool {
@@ -599,29 +630,29 @@ All parameter, weight, gradient are variables in Paddle.
           },
           py::arg("name"),
           R"DOC(
-           Find or create variable named :code:`name` in the current scope. 
+           Find or create variable named :code:`name` in the current scope.
-           If the variable named :code:`name` does not exist in the 
+           If the variable named :code:`name` does not exist in the
           current scope, the variable would be created. Otherwise,
-           return the existing variable. 
+           return the existing variable.
           Args:
-               name (str): the variable name.  
+               name (str): the variable name.
           Returns:
-               out (core.Variable): the found or created variable. 
+               out (core.Variable): the found or created variable.
           )DOC",
           py::return_value_policy::reference)
      .def("find_var", &Scope::FindVar, py::arg("name"),
           R"DOC(
-           Find variable named :code:`name` in the current scope or 
+           Find variable named :code:`name` in the current scope or
           its parent scope. Return None if not found.
           Args:
               name (str): the variable name.
           Returns:
-               out (core.Variable|None): the found variable or None.   
+               out (core.Variable|None): the found variable or None.
           )DOC",
           py::return_value_policy::reference)
      .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
@@ -645,7 +676,7 @@ All parameter, weight, gradient are variables in Paddle.
        },
        R"DOC(
        Create a new scope.
        Returns:
            out (core._Scope): the created scope.
        )DOC",

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -393,6 +393,9 @@ class Variable(object):
            if not self._ivar:
                self._ivar = core.VarBase(stop_gradient)
            self._ivar.desc = self.desc
+            self._ivar.block = block.desc
+            self._ivar.name = name
+            self._ivar.persistable = persistable
            if persistable:
                self.block.vars[name] = self
        else:
@@ -721,7 +724,9 @@ class Operator(object):
                out_arg_names = []
                for arg in out_args:
                    out_arg_names.append(cpt.to_text(arg.name))
-                    arg.op = self
+                    # TODO(minqiyang): could we remove variable's op in static mode?
+                    if not _in_imperative_mode():
+                        arg.op = self
                self.desc.set_output(out_proto.name, out_arg_names)
        if op_attrs is not None:
@@ -1200,15 +1205,6 @@ class Block(object):
        else:
            raise ValueError("Var {0} is not found recursively".format(name))
-    def _clear_block(self):
-        # TODO(minqiyang): move this to backward_hooks
-        self.desc._clear_block()
-        for name in self.vars.keys():
-            assert self.vars[name].persistable
-        del self.ops[:]
    def all_parameters(self):
        return list(self.iter_parameters())
@@ -1345,26 +1341,13 @@ class Block(object):
            #
            # TODO(minqiyang): add op stop_gradient support in static mode too.
            # currently, we only support stop_gradient in imperative mode.
-            self._trace_op(op, kwargs.get("stop_gradient", False))
+            _imperative_tracer().trace_op(op,
-        self.ops.append(op)
+                                          kwargs.get("stop_gradient", False))
+        else:
+            self.ops.append(op)
        return op
-    def _trace_op(self, op, stop_gradient=False):
-        backward_refs = _imperative_tracer().trace(
-            op.iop, op.inputs, op.outputs, self.desc,
-            _imperative_current_expected_place_, stop_gradient)
-        # TODO(minqiyang): support backward_hooks to eager remove backward_refs
-        op.backward_refs = defaultdict(list)
-        for k, v in six.iteritems(op.inputs):
-            if k in backward_refs:
-                op.backward_refs[k] = op.inputs[k]
-        for k, v in six.iteritems(op.outputs):
-            if k in backward_refs:
-                op.backward_refs[k] = op.outputs[k]
    def _insert_op(self, index, *args, **kwargs):
        """
        Insert a Operator according to the giving arguments.
@@ -1417,9 +1400,11 @@ class Block(object):
            inputs=kwargs.get("inputs", None),
            outputs=kwargs.get("outputs", None),
            attrs=kwargs.get("attrs", None))
-        self.ops.insert(0, op)
        if _in_imperative_mode():
-            self._trace_op(op, kwargs.get("stop_gradient", False))
+            _imperative_tracer().trace_op(op,
+                                          kwargs.get("stop_gradient", False))
+        else:
+            self.ops.insert(0, op)
        return op
    def _sync_with_cpp(self):

--- a/python/paddle/fluid/imperative/__init__.py
+++ b/python/paddle/fluid/imperative/__init__.py
@@ -23,7 +23,11 @@ from .layers import *
 from . import nn
 from .nn import *
+from . import tracer
+from .tracer import *
 __all__ = []
 __all__ += layers.__all__
 __all__ += base.__all__
 __all__ += nn.__all__
+__all__ += tracer.__all__
--- a/python/paddle/fluid/imperative/base.py
+++ b/python/paddle/fluid/imperative/base.py
@@ -16,6 +16,7 @@ import numpy as np
 from paddle.fluid import core
 from paddle.fluid import framework
+from .tracer import Tracer
 __all__ = ['enabled', 'guard', 'to_variable']
@@ -28,7 +29,7 @@ def enabled():
 def guard(place=None):
    train = framework.Program()
    startup = framework.Program()
-    tracer = core.Tracer(train.current_block().desc)
+    tracer = Tracer(train.current_block().desc)
    if place is None:
        if core.is_compiled_with_cuda():

--- a/python/paddle/fluid/imperative/tracer.py
+++ b/python/paddle/fluid/imperative/tracer.py
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import six
+from collections import defaultdict
+from paddle.fluid import core
+from paddle.fluid import framework
+__all__ = ['Tracer']
+def release_op(op):
+    del framework._imperative_tracer()._ops[op._trace_id]
+class Tracer(core.Tracer):
+    """
+    Python wrapper of imperative tracer
+    """
+    def __init__(self, block):
+        super(Tracer, self).__init__(block)
+        self._ops = defaultdict()
+        self._trace_id = 0
+    def trace_op(self, op, stop_gradient=False):
+        # record op's trace id
+        op.iop._trace_id = self._trace_id
+        # trace op and save it
+        backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.block.desc,
+                                   framework._current_expected_place(),
+                                   stop_gradient)
+        if not stop_gradient:
+            self._trace_id += 1
+            self._ops[op.iop._trace_id] = op
+            # register backward hooks and variables if needed
+            if len(backward_refs) > 0:
+                op.iop.register_backward_hooks(release_op)
+                # TODO(minqiyang): remove all inputs and outputs after seperate
+                # var and grad
+                op.backward_refs = defaultdict(list)
+                for k, v in six.iteritems(op.inputs):
+                    if k in backward_refs:
+                        op.backward_refs[k] = op.inputs[k]
+                for k, v in six.iteritems(op.outputs):
+                    if k in backward_refs:
+                        op.backward_refs[k] = op.outputs[k]
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -19,6 +19,7 @@ import numpy as np
 from .wrapped_decorator import signature_safe_contextmanager
 from .core import VarDesc
 from . import unique_name
+from .imperative import base as imperative_base
 __all__ = [
    'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
@@ -165,7 +166,8 @@ class ConstantInitializer(Initializer):
                'force_cpu': self._force_cpu or force_init_on_cpu()
            },
            stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -244,7 +246,8 @@ class UniformInitializer(Initializer):
                attrs={"in_dtype": out_var.dtype,
                       "out_dtype": var.dtype})
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -322,7 +325,8 @@ class NormalInitializer(Initializer):
                outputs={"Out": var},
                attrs={"in_dtype": out_var.dtype,
                       "out_dtype": var.dtype})
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -400,7 +404,8 @@ class TruncatedNormalInitializer(Initializer):
                outputs={"Out": var},
                attrs={"in_dtype": out_var.dtype,
                       "out_dtype": var.dtype})
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -505,7 +510,8 @@ class XavierInitializer(Initializer):
                    "seed": self._seed
                },
                stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -605,7 +611,8 @@ class MSRAInitializer(Initializer):
                    "seed": self._seed
                },
                stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -703,7 +710,8 @@ class BilinearInitializer(Initializer):
                'shape': list(shape),
                value_name: values
            })
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op
@@ -761,7 +769,8 @@ class NumpyArrayInitializer(Initializer):
                value_name: values
            },
            stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
        return op

--- a/python/paddle/fluid/tests/unittests/test_imperative.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative.py
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import print_function
 import contextlib
 import unittest
 import numpy as np
@@ -142,8 +144,6 @@ class TestImperativeMnist(unittest.TestCase):
                    sgd.minimize(avg_loss)
                    mnist.clear_gradients()
-                    fluid.default_main_program().global_block()._clear_block()
                    dy_param_value = {}
                    for param in mnist.parameters():
                        dy_param_value[param.name] = param._numpy()

--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -243,7 +243,9 @@ class TestImperativePtbRnn(unittest.TestCase):
            dy_loss = None
            last_hidden = None
            last_cell = None
-            for i in range(2):
+            batch_num = 50
+            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
@@ -302,7 +304,7 @@ class TestImperativePtbRnn(unittest.TestCase):
            static_loss_value = None
            static_last_cell_value = None
            static_last_hidden_value = None
-            for i in range(2):
+            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))

--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -231,7 +231,7 @@ class TestImperativeResnet(unittest.TestCase):
        seed = 90
        batch_size = train_parameters["batch_size"]
-        batch_num = 2
+        batch_num = 20
        with fluid.imperative.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
@@ -286,8 +286,6 @@ class TestImperativeResnet(unittest.TestCase):
                optimizer.minimize(avg_loss)
                resnet.clear_gradients()
-                fluid.default_main_program().global_block()._clear_block()
                dy_param_value = {}
                for param in resnet.parameters():
                    dy_param_value[param.name] = param._numpy()