open dygraph op test, test=develop (#19787)

* open dygraph op test, test=develop * modify to_variable, test=develop * modify input and output for dygraph, test=develop * modify input and output for dygraph(fix bug), test=develop * fix input processing of dygraph op test, test=develop * fix bug, test=develop * fix op test, test=develop * fix forward bug for dygraph, test=develop * fix mkldnn op test for forward, test=develop * update nn.py for dygraph, test=develop * fix crop_tensor_op, test=develop * fix elementwise_mul_op, test=develop * fix fill_op, test=develop * fix some mkldnn op, test=develop * open backward op test for dygraph, test=develop * delete log, test=develop * close backward op test for dygraph, test=develop * fix bug for edit_distance_op and test_lstm_cudnn_op, test=develop * fix optest backward bug for dygraph, test=develop * fix optest backward bug for dygraph, test=develop * close backward op test for dygraph, test=develop * close backward op test for dygraph, test=develop * open dygraph op test, test=develop * fix op test for dygraph, fix GradOpDescMaker, test=develop * fix bug for linear_chain_crf_op.h, test=develop * remove log, test=develop * remove log, test=develop * remove log for op_test.py, test=develop * remove log for op_test.py, test=develop * fix bug for var_conv_2d_op, change PADDLE_ENFORCE, test=develop * fix PADDLE_ENFORCE_EQ for hierarchical_sigmoid_op.cc, test=develop * fix bug for test_increment_ngraph_op.py, test=develop * fix lod for op test in dygraph, test=develop * refactor op_test.py to reduce redundant code, test=develop * fix lod optest, modify InputVar/OutputVar to HasInput/HasOutput, test=develop * remove debug log, test=develop * remove redundant code in base.py, test=develop * fix some error in optest, test=develop * fix ClearNoNeedBufferInputs function's bug for LoDTensor, test=develop * refactor op_test.py, test=develop * remove redundant writing, test=develop * fix error(get tensor of the grad variable), test=develop * fix test_concat_mkldnn test_conv2d_mkldnn, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix some redundant code, test=develop * reslove conflict and rewrite paddle error message, test=develop

open dygraph op test, test=develop (#19787)
* open dygraph op test, test=develop * modify to_variable, test=develop * modify input and output for dygraph, test=develop * modify input and output for dygraph(fix bug), test=develop * fix input processing of dygraph op test, test=develop * fix bug, test=develop * fix op test, test=develop * fix forward bug for dygraph, test=develop * fix mkldnn op test for forward, test=develop * update nn.py for dygraph, test=develop * fix crop_tensor_op, test=develop * fix elementwise_mul_op, test=develop * fix fill_op, test=develop * fix some mkldnn op, test=develop * open backward op test for dygraph, test=develop * delete log, test=develop * close backward op test for dygraph, test=develop * fix bug for edit_distance_op and test_lstm_cudnn_op, test=develop * fix optest backward bug for dygraph, test=develop * fix optest backward bug for dygraph, test=develop * close backward op test for dygraph, test=develop * close backward op test for dygraph, test=develop * open dygraph op test, test=develop * fix op test for dygraph, fix GradOpDescMaker, test=develop * fix bug for linear_chain_crf_op.h, test=develop * remove log, test=develop * remove log, test=develop * remove log for op_test.py, test=develop * remove log for op_test.py, test=develop * fix bug for var_conv_2d_op, change PADDLE_ENFORCE, test=develop * fix PADDLE_ENFORCE_EQ for hierarchical_sigmoid_op.cc, test=develop * fix bug for test_increment_ngraph_op.py, test=develop * fix lod for op test in dygraph, test=develop * refactor op_test.py to reduce redundant code, test=develop * fix lod optest, modify InputVar/OutputVar to HasInput/HasOutput, test=develop * remove debug log, test=develop * remove redundant code in base.py, test=develop * fix some error in optest, test=develop * fix ClearNoNeedBufferInputs function's bug for LoDTensor, test=develop * refactor op_test.py, test=develop * remove redundant writing, test=develop * fix error(get tensor of the grad variable), test=develop * fix test_concat_mkldnn test_conv2d_mkldnn, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix optest.py for get tensor of LoDTensor, test=develop * fix some redundant code, test=develop * reslove conflict and rewrite paddle error message, test=develop
c4ede95c · zhongpu · hong · 3ab60f5b · c4ede95c · c4ede95c
35 changed file
--- a/paddle/fluid/imperative/layer.h
+++ b/paddle/fluid/imperative/layer.h
@@ -253,12 +253,14 @@ class RuntimeInferVarTypeContext : public framework::InferVarTypeContext {
  }

  bool HasInput(const std::string& name) const override {
-    return inputs_.count(name) > 0;
+    auto it = inputs_.find(name);
+    return (it != inputs_.end() && it->second.size() > 0);
  }

  bool HasOutput(const std::string& name) const override {
    PADDLE_ENFORCE_NOT_NULL(outputs_);
-    return outputs_->count(name) > 0;
+    auto it = outputs_->find(name);
+    return (it != outputs_->end() && it->second.size() > 0);
  }

  const std::vector<std::string>& Input(

--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -53,6 +53,7 @@ static void ClearNoNeedBufferInputs(OpBase* op) {
          new_var->MutableVar()->GetMutable<framework::LoDTensor>();
      auto& old_tensor = var.Get<framework::LoDTensor>();
      new_tensor->Resize(old_tensor.dims());
+      new_tensor->set_lod(old_tensor.lod());
      each_var.reset(new_var);
    }
  }

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -61,16 +61,30 @@ class HierarchicalSigmoidOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("PreOut"),
-                   "Output(PreOut) should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+                      platform::errors::NotFound(
+                          "Input(X) of HierarchicalSigmoidOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Label"), true,
+        platform::errors::NotFound(
+            "Input(Label) of HierarchicalSigmoidOp is not found."));
+    PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true,
+                      platform::errors::NotFound(
+                          "Input(W) of HierarchicalSigmoidOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("Out"), true,
+        platform::errors::NotFound(
+            "Output(Out) of HierarchicalSigmoidOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("PreOut"), true,
+        platform::errors::NotFound(
+            "Output(PreOut) of HierarchicalSigmoidOp is not found."));
    auto with_prefetch = ctx->Attrs().Get<bool>("remote_prefetch");
    if (with_prefetch) {
-      PADDLE_ENFORCE(ctx->HasOutput("W_Out"),
-                     "Output(W_Out) should not be null.");
+      PADDLE_ENFORCE_EQ(
+          ctx->HasOutput("W_Out"), true,
+          platform::errors::NotFound(
+              "Output(W_Out) of HierarchicalSigmoidOp is not found."));
    }
    const int64_t batch_size = ctx->GetInputDim("X")[0];
    std::vector<int64_t> output_shape({batch_size, 1});
@@ -202,16 +216,30 @@ class HierarchicalSigmoidGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
-                   "Input(Out@Grad) should not be null");
-    PADDLE_ENFORCE(ctx->HasInput("PreOut"),
-                   "Input(Preout) should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("W")),
-                   "Output(W@Grad should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
-                   "Output(X@Grad should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("W"), true,
+        platform::errors::NotFound(
+            "Input(W) of HierarchicalSigmoidGradOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Label"), true,
+        platform::errors::NotFound(
+            "Input(Label) of HierarchicalSigmoidGradOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput(framework::GradVarName("Out")), true,
+        platform::errors::NotFound(
+            "Input(Out@Grad) of HierarchicalSigmoidGradOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("PreOut"), true,
+        platform::errors::NotFound(
+            "Input(Preout) of HierarchicalSigmoidGradOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput(framework::GradVarName("W")), true,
+        platform::errors::NotFound(
+            "Output(W@Grad of HierarchicalSigmoidGradOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput(framework::GradVarName("X")), true,
+        platform::errors::NotFound(
+            "Output(X@Grad of HierarchicalSigmoidGradOp is not found."));

    if (ctx->HasOutput(framework::GradVarName("Bias"))) {
      ctx->SetOutputDim(framework::GradVarName("Bias"),
@@ -235,10 +263,10 @@ class HierarchicalSigmoidGradOpGradVarTypeInference
 public:
  void operator()(framework::InferVarTypeContext* ctx) const override {
    auto w_grad_var_name = ctx->Output(framework::GradVarName("W")).front();
-    auto bias_grad_var_name_vec = ctx->Output(framework::GradVarName("Bias"));
+    auto has_bias_grad_var = ctx->HasOutput(framework::GradVarName("Bias"));
    std::string bias_grad_var_name;
    bool hasBias = false;
-    if (bias_grad_var_name_vec.size()) {
+    if (has_bias_grad_var) {
      hasBias = true;
      bias_grad_var_name = ctx->Output(framework::GradVarName("Bias")).front();
    }

--- a/paddle/fluid/operators/minus_op.cc
+++ b/paddle/fluid/operators/minus_op.cc
@@ -29,12 +29,15 @@ class MinusOp : public framework::OperatorWithKernel {
      : OperatorWithKernel(type, inputs, outputs, attrs) {}

  void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"),
-                   "Input(X) of MinusOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Y"),
-                   "Input(Y) of MinusOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of MinusOp should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("X"), true,
+        platform::errors::NotFound("Input(X) of MinusOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Y"), true,
+        platform::errors::NotFound("Input(Y) of MinusOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("Out"), true,
+        platform::errors::NotFound("Output(Out) of MinusOp is not found."));

    auto x_dims = ctx->GetInputDim("X");
    auto y_dims = ctx->GetInputDim("Y");
@@ -71,27 +74,57 @@ or not. But the output only shares the LoD information with input `X`.
  }
 };

-class MinusGradMaker : public framework::GradOpDescMakerBase {
+class MinusGradDescMaker : public framework::GradOpDescMakerBase {
 public:
  using framework::GradOpDescMakerBase::GradOpDescMakerBase;

  std::vector<std::unique_ptr<framework::OpDesc>> operator()() const override {
    std::vector<std::unique_ptr<framework::OpDesc>> ops;
-    auto x_g = InputGrad("X");
+    auto x_g = this->InputGrad("X");
    if (!x_g.empty()) {
      auto *x_g_op = new framework::OpDesc();
      x_g_op->SetType("scale");
-      x_g_op->SetInput("X", OutputGrad("Out"));
+      x_g_op->SetInput("X", this->OutputGrad("Out"));
      x_g_op->SetOutput("Out", x_g);
      x_g_op->SetAttr("scale", 1.0f);
      ops.emplace_back(x_g_op);
    }

-    auto y_g = InputGrad("Y");
+    auto y_g = this->InputGrad("Y");
    if (!y_g.empty()) {
      auto *y_g_op = new framework::OpDesc();
      y_g_op->SetType("scale");
-      y_g_op->SetInput("X", OutputGrad("Out"));
+      y_g_op->SetInput("X", this->OutputGrad("Out"));
+      y_g_op->SetOutput("Out", y_g);
+      y_g_op->SetAttr("scale", -1.0f);
+      ops.emplace_back(y_g_op);
+    }
+
+    return ops;
+  }
+};
+
+class MinusGradMaker : public imperative::GradOpBaseMakerBase {
+ public:
+  using imperative::GradOpBaseMakerBase::GradOpBaseMakerBase;
+
+  std::vector<std::unique_ptr<imperative::OpBase>> operator()() const override {
+    std::vector<std::unique_ptr<imperative::OpBase>> ops;
+    auto x_g = this->InputGrad("X");
+    if (!x_g.empty()) {
+      auto *x_g_op = new imperative::OpBase();
+      x_g_op->SetType("scale");
+      x_g_op->SetInput("X", this->OutputGrad("Out"));
+      x_g_op->SetOutput("Out", x_g);
+      x_g_op->SetAttr("scale", 1.0f);
+      ops.emplace_back(x_g_op);
+    }
+
+    auto y_g = this->InputGrad("Y");
+    if (!y_g.empty()) {
+      auto *y_g_op = new imperative::OpBase();
+      y_g_op->SetType("scale");
+      y_g_op->SetInput("X", this->OutputGrad("Out"));
      y_g_op->SetOutput("Out", y_g);
      y_g_op->SetAttr("scale", -1.0f);
      ops.emplace_back(y_g_op);
@@ -105,6 +138,7 @@ class MinusGradMaker : public framework::GradOpDescMakerBase {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker, ops::MinusGradMaker);
+REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker,
+                  ops::MinusGradDescMaker, ops::MinusGradMaker);
 REGISTER_OP_CPU_KERNEL(
    minus, ops::MinusKernel<paddle::platform::CPUDeviceContext, float>);
--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */

 #include "paddle/fluid/operators/nce_op.h"

+#include <memory>
 #include <string>
 #include <vector>

@@ -212,6 +213,33 @@ By default this operator uses a uniform distribution for sampling.
  }
 };

+template <typename T>
+class NCEGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+  std::unique_ptr<T> Apply() const override {
+    auto *op = new T();
+    op->SetType(this->ForwardOpType() + "_grad");
+    op->SetInput("Input", this->Input("Input"));
+    op->SetInput("Label", this->Input("Label"));
+    op->SetInput("Bias", this->Input("Bias"));
+    op->SetInput("Weight", this->Input("Weight"));
+    op->SetInput("Cost", this->Output("Cost"));
+    op->SetInput("SampleLogits", this->Output("SampleLogits"));
+    op->SetInput("SampleLabels", this->Output("SampleLabels"));
+    op->SetInput("SampleWeight", this->Input("SampleWeight"));
+    op->SetInput("CustomDistProbs", this->Input("CustomDistProbs"));
+    op->SetInput("CustomDistAlias", this->Input("CustomDistAlias"));
+    op->SetInput("CustomDistAliasProbs", this->Input("CustomDistAliasProbs"));
+    op->SetInput(framework::GradVarName("Cost"), this->OutputGrad("Cost"));
+    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
+    op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias"));
+    op->SetOutput(framework::GradVarName("Weight"), this->InputGrad("Weight"));
+    op->SetAttrMap(this->Attrs());
+    return std::unique_ptr<T>(op);
+  }
+};
+
 class NCEOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@@ -277,11 +305,9 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    nce, ops::NCEOp,
-    paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
-    paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>,
-    ops::NCEOpMaker);
+REGISTER_OPERATOR(nce, ops::NCEOp, ops::NCEOpMaker,
+                  ops::NCEGradOpMaker<paddle::framework::OpDesc>,
+                  ops::NCEGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(nce_grad, ops::NCEOpGrad, ops::NCEOpGradVarTypeInference);
 REGISTER_OP_CPU_KERNEL(nce, ops::NCEKernel<paddle::platform::CPUPlace, float>,
                       ops::NCEKernel<paddle::platform::CPUPlace, double>);

--- a/paddle/fluid/operators/unpool_op.cc
+++ b/paddle/fluid/operators/unpool_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/operators/unpool_op.h"
+#include <memory>
 #include <string>
 #include <vector>
 namespace paddle {
@@ -82,14 +83,15 @@ class UnpoolOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"),
-                   "Input(X) of UnpoolOp"
-                   "should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Indices"),
-                   "Input(Indices) of UnpoolOp"
-                   "should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of UnpoolOp should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("X"), true,
+        platform::errors::NotFound("Input(X) of UnpoolOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Indices"), true,
+        platform::errors::NotFound("Input(Indices) of UnpoolOp is not found."));
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("Out"), true,
+        platform::errors::NotFound("Output(Out) of UnpoolOp is not found."));
    auto in_x_dims = ctx->GetInputDim("X");
    auto in_y_dims = ctx->GetInputDim("Indices");
    std::string unpooling_type =
@@ -97,8 +99,11 @@ class UnpoolOp : public framework::OperatorWithKernel {
    std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    PADDLE_ENFORCE(in_x_dims.size() == 4,
-                   "Unpooling intput must be of 4-dimensional.");
+    PADDLE_ENFORCE_EQ(in_x_dims.size() == 4, true,
+                      platform::errors::InvalidArgument(
+                          "Unpooling intput(X) must be of 4-dimensional, but "
+                          "received X's dimension is %d.",
+                          in_x_dims.size()));
    PADDLE_ENFORCE_EQ(in_x_dims, in_y_dims);

    std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
@@ -114,6 +119,23 @@ class UnpoolOp : public framework::OperatorWithKernel {
  }
 };

+template <typename T>
+class UnpoolOpGradMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+  std::unique_ptr<T> Apply() const override {
+    auto* op = new T();
+    op->SetType(this->ForwardOpType() + "_grad");
+    op->SetInput("X", this->Input("X"));
+    op->SetInput("Indices", this->Input("Indices"));
+    op->SetInput("Out", this->Output("Out"));
+    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+    op->SetAttrMap(this->Attrs());
+    return std::unique_ptr<T>(op);
+  }
+};
+
 class UnpoolOpGrad : public framework::OperatorWithKernel {
 protected:
  framework::OpKernelType GetExpectedKernelType(
@@ -126,9 +148,12 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
-                   "Input(X@GRAD) should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("X"), true,
+        platform::errors::NotFound("Input(X) of UnpoolOpGradOp is not found."));
+    PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
+                      platform::errors::NotFound(
+                          "Input(X@GRAD) of UnpoolOpGradOp is not found."));
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
 };
@@ -136,10 +161,9 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
 }  // namespace paddle

 namespace ops = paddle::operators;
-REGISTER_OPERATOR(
-    unpool, ops::UnpoolOp, ops::Unpool2dOpMaker,
-    paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
-    paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
+REGISTER_OPERATOR(unpool, ops::UnpoolOp, ops::Unpool2dOpMaker,
+                  ops::UnpoolOpGradMaker<paddle::framework::OpDesc>,
+                  ops::UnpoolOpGradMaker<paddle::imperative::OpBase>);

 REGISTER_OPERATOR(unpool_grad, ops::UnpoolOpGrad);
 REGISTER_OP_CPU_KERNEL(

--- a/paddle/fluid/operators/var_conv_2d_op.cc
+++ b/paddle/fluid/operators/var_conv_2d_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/operators/var_conv_2d_op.h"
+#include <memory>
 #include <vector>
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/math_function.h"
@@ -57,18 +58,24 @@ void VarConv2dOpMaker::Make() {
 }

 void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
-  PADDLE_ENFORCE(ctx->HasInput("X"),
-                 "X(Input) of VarConv2dOP should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("W"),
-                 "W(Input) of VarConv2dOP should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("ROW"),
-                 "Input(ROW) of VarConv2dOP should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("COLUMN"),
-                 "Input(COLUMN) of VarConv2dOP should not be null.");
-  PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                 "Out(Output) of VarConv2dOP should not be null.");
-  PADDLE_ENFORCE(ctx->HasOutput("Col"),
-                 "Col(Output) of VarConv2dOP should not be null.");
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("X"), true,
+      platform::errors::NotFound("X(Input) of VarConv2dOP is not found."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("W"), true,
+      platform::errors::NotFound("W(Input) of VarConv2dOP is not found."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("ROW"), true,
+      platform::errors::NotFound("Input(ROW) of VarConv2dOP is not found."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("COLUMN"), true,
+      platform::errors::NotFound("Input(COLUMN) of VarConv2dOP is not found."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasOutput("Out"), true,
+      platform::errors::NotFound("Out(Output) of VarConv2dOP is not found."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasOutput("Col"), true,
+      platform::errors::NotFound("Col(Output) of VarConv2dOP is not found."));

  auto x_dims = ctx->GetInputDim("X");
  PADDLE_ENFORCE_EQ(x_dims.size(), 2,
@@ -91,7 +98,10 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
    framework::Variable* x_var =
        boost::get<framework::Variable*>(ctx->GetInputVarPtrs("X")[0]);
    const auto& x_lod = x_var->Get<LoDTensor>().lod();
-    PADDLE_ENFORCE(!x_lod.empty(), "The Input(X) must hold lod info.");
+    PADDLE_ENFORCE_EQ(
+        !x_lod.empty(), true,
+        platform::errors::InvalidArgument("The Input(X) Tensor of VarConv2dOP "
+                                          "does not contain LoD information."));

    PADDLE_ENFORCE_GE(x_lod.size(), 1, "The Input(X)'s lod info is corrupted.");
    PADDLE_ENFORCE_EQ(
@@ -101,12 +111,18 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const {
    framework::Variable* row_var =
        boost::get<framework::Variable*>(ctx->GetInputVarPtrs("ROW")[0]);
    const auto& row_lod = row_var->Get<LoDTensor>().lod();
-    PADDLE_ENFORCE(!row_lod.empty(), "The Input(ROW) must hold lod info.");
+    PADDLE_ENFORCE_EQ(!row_lod.empty(), true,
+                      platform::errors::InvalidArgument(
+                          "The Input(ROW) Tensor of VarConv2dOP does not "
+                          "contain LoD information."));

    framework::Variable* col_var =
        boost::get<framework::Variable*>(ctx->GetInputVarPtrs("COLUMN")[0]);
    const auto& col_lod = col_var->Get<LoDTensor>().lod();
-    PADDLE_ENFORCE(!col_lod.empty(), "The Input(COLUMN) must hold lod info.");
+    PADDLE_ENFORCE_EQ(!col_lod.empty(), true,
+                      platform::errors::InvalidArgument(
+                          "The Input(COLUMN) Tensor of VarConv2dOP does not "
+                          "contain LoD information."));
  } else {
    std::vector<int64_t> out_dims_vec{-1};
    out_dims_vec.push_back(1);
@@ -280,13 +296,40 @@ class CPUVarConv2dOPKernel : public framework::OpKernel<T> {
  }
 };

+template <typename T>
+class VarConv2dGradMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+
+  std::unique_ptr<T> Apply() const override {
+    auto* op = new T();
+    op->SetType(this->ForwardOpType() + "_grad");
+    op->SetInput("X", this->Input("X"));
+    op->SetInput("W", this->Input("W"));
+    op->SetInput("ROW", this->Input("ROW"));
+    op->SetInput("COLUMN", this->Input("COLUMN"));
+    op->SetInput("Col", this->Output("Col"));
+    op->SetInput("Out", this->Output("Out"));
+    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+
+    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+    op->SetOutput(framework::GradVarName("W"), this->InputGrad("W"));
+    op->SetAttrMap(this->Attrs());
+
+    return std::unique_ptr<T>(op);
+  }
+};
+
 void VarConv2dOpGrad::InferShape(framework::InferShapeContext* ctx) const {
-  PADDLE_ENFORCE(ctx->HasInput("X"),
-                 "Input(X) of SequencePadGradOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("W"),
-                 "Input(W) of SequencePadGradOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
-                 "Input(Out@GRAD) of SequencePadGradOp should not be null.");
+  PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
+                    platform::errors::NotFound(
+                        "Input(X) of SequencePadGradOp is not found."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true,
+                    platform::errors::NotFound(
+                        "Input(W) of SequencePadGradOp is not found."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true,
+                    platform::errors::NotFound(
+                        "Input(Out@GRAD) of SequencePadGradOp is not found."));

  if (ctx->HasOutput(framework::GradVarName("X"))) {
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
@@ -416,10 +459,9 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plt = paddle::platform;
 namespace frm = paddle::framework;
-REGISTER_OPERATOR(
-    var_conv_2d, ops::VarConv2dOP, ops::VarConv2dOpMaker,
-    paddle::framework::DefaultGradOpMaker<paddle::framework::OpDesc, true>,
-    paddle::framework::DefaultGradOpMaker<paddle::imperative::OpBase, true>);
+REGISTER_OPERATOR(var_conv_2d, ops::VarConv2dOP, ops::VarConv2dOpMaker,
+                  ops::VarConv2dGradMaker<paddle::framework::OpDesc>,
+                  ops::VarConv2dGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(var_conv_2d_grad, ops::VarConv2dOpGrad);

 REGISTER_OP_CPU_KERNEL(var_conv_2d,

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
@@ -28,6 +28,17 @@ class TestMKLDNNReluDim2(TestRelu):

        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
    def setUp(self):
@@ -35,6 +46,17 @@ class TestMKLDNNLeakyReluDim2(TestLeakyRelu):

        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNTanhDim2(TestTanh):
    def setUp(self):
@@ -42,6 +64,17 @@ class TestMKLDNNTanhDim2(TestTanh):

        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNSqrtDim2(TestSqrt):
    def setUp(self):
@@ -49,12 +82,34 @@ class TestMKLDNNSqrtDim2(TestSqrt):

        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNAbsDim2(TestAbs):
    def setUp(self):
        super(TestMKLDNNAbsDim2, self).setUp()
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNReluDim4(TestRelu):
    def setUp(self):
@@ -69,6 +124,17 @@ class TestMKLDNNReluDim4(TestRelu):
        self.outputs = {'Out': out}
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
    def setUp(self):
@@ -83,6 +149,17 @@ class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
        self.outputs = {'Out': out}
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNTanhDim4(TestTanh):
    def setUp(self):
@@ -94,6 +171,17 @@ class TestMKLDNNTanhDim4(TestTanh):
        self.outputs = {'Out': np.tanh(self.inputs['X'])}
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNSqrtDim4(TestSqrt):
    def setUp(self):
@@ -105,6 +193,17 @@ class TestMKLDNNSqrtDim4(TestSqrt):
        self.outputs = {'Out': np.sqrt(self.inputs['X'])}
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 class TestMKLDNNAbsDim4(TestAbs):
    def setUp(self):
@@ -117,6 +216,17 @@ class TestMKLDNNAbsDim4(TestAbs):
        self.outputs = {'Out': np.abs(self.inputs['X'])}
        self.attrs = {"use_mkldnn": True}

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
+

 # Check if primitives already exist in backward
 class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py
@@ -36,7 +36,8 @@ class TestConcatOp(OpTest):
        self.outputs = {'Out': self.output}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)

 #--------------------test concat s8 in with axis 0--------------------


--- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py
@@ -24,6 +24,10 @@ class TestMKLDNNConcatOp(TestConcatOp):
        self.attrs["use_mkldnn"] = True
        self._cpu_only = True

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False))
+
    def test_check_grad(self):
        pass

@@ -37,6 +41,10 @@ class TestMKLDNNConcatOp2(TestConcatOp2):
        self.attrs["use_mkldnn"] = True
        self._cpu_only = True

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode 
+        self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False))
+
    def test_check_grad(self):
        pass

@@ -50,6 +58,10 @@ class TestMKLDNNConcatOp3(TestConcatOp3):
        self.attrs["use_mkldnn"] = True
        self._cpu_only = True

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode 
+        self.check_output(check_dygraph=(self.attrs["use_mkldnn"] == False))
+
    def test_check_grad(self):
        pass


--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
@@ -146,7 +146,9 @@ class TestConv2dInt8Op(TestConv2dOp):
        self.outputs = {'Output': output}

    def test_check_output(self):
-        self.check_output_with_place(core.CPUPlace(), atol=0)
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output_with_place(
+            core.CPUPlace(), atol=0, check_dygraph=False)

    def test_check_grad(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
@@ -44,7 +44,8 @@ class TestDeQuantizeOp(OpTest):
        self.attrs = {'Scale': self.scale, }

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)

    def set_scale(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py
@@ -53,7 +53,8 @@ class TestFCMKLDNNOp(OpTest):
        }

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad_normal(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py
@@ -25,7 +25,13 @@ class TestLRNMKLDNNOp(TestLRNOp):
        return attrs

    def test_check_output(self):
-        self.check_output(atol=0.002)
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(atol=0.002, check_dygraph=False)
+
+    def test_check_grad_normal(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'], 'Out', max_relative_error=0.01, check_dygraph=False)


 class TestLRNMKLDNNOpWithIsTest(TestLRNMKLDNNOp):
@@ -37,7 +43,8 @@ class TestLRNMKLDNNOpWithIsTest(TestLRNMKLDNNOp):
    def test_check_grad_normal(self):
        def check_raise_is_test():
            try:
-                self.check_grad(['X'], 'Out', max_relative_error=0.01)
+                self.check_grad(
+                    ['X'], 'Out', max_relative_error=0.01, check_dygraph=False)
            except Exception as e:
                t = \
                "is_test attribute should be set to False in training phase."

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py
@@ -73,7 +73,9 @@ class TestMKLDNNMulOpS8S8(OpTest):
        self.outputs = {'Out': output}

    def test_check_output(self):
-        self.check_output_with_place(core.CPUPlace(), atol=0)
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output_with_place(
+            core.CPUPlace(), atol=0, check_dygraph=False)

    def test_check_grad_normal(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py
@@ -43,7 +43,9 @@ class TestPool2dMKLDNNInt8_Op(TestPool2D_Op):
        self.outputs = {'Out': output}

    def test_check_output(self):
-        self.check_output_with_place(core.CPUPlace(), atol=1e-5)
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output_with_place(
+            core.CPUPlace(), atol=1e-5, check_dygraph=False)

    def test_check_grad(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
@@ -47,7 +47,8 @@ class TestQuantizeOp(OpTest):
        }

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)

    def set_scale(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
@@ -52,7 +52,8 @@ class TestReQuantizeOp(OpTest):
        self.attrs = {'Scale_in': self.scale_in, 'Scale_out': self.scale_out}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)

    def set_scale(self):
        pass

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py
@@ -22,7 +22,62 @@ from paddle.fluid.tests.unittests.test_softmax_op import *
 from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd


+def stable_softmax(x):
+    """Compute the softmax of vector x in a numerically stable way."""
+    shiftx = x - np.max(x).clip(-64.)
+    exps = np.exp(shiftx)
+    return exps / np.sum(exps)
+
+
 class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
+    def get_x_shape(self):
+        return [10, 10]
+
+    def get_axis(self):
+        return -1
+
+    def setUp(self):
+        self.op_type = "softmax"
+        self.use_cudnn = False
+        self.use_mkldnn = False
+        self.dtype = np.float32
+        self.init_kernel_type()
+        self.shape = self.get_x_shape()
+        self.axis = self.get_axis()
+
+        x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
+        out = np.apply_along_axis(stable_softmax, self.axis, x)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {
+            'axis': self.axis,
+            'use_cudnn': self.use_cudnn,
+            'use_mkldnn': self.use_mkldnn
+        }
+
+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5, check_dygraph=False)
+        else:
+            self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        if self.use_cudnn or self.dtype == np.float16:
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_grad_with_place(
+                    place, ["X"],
+                    "Out",
+                    max_relative_error=0.01,
+                    check_dygraph=False)
+        else:
+            self.check_grad(
+                ["X"], "Out", max_relative_error=0.01, check_dygraph=False)
+
    def init_kernel_type(self):
        self.use_mkldnn = True


--- a/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py
@@ -17,11 +17,32 @@ from __future__ import print_function
 import unittest

 from paddle.fluid.tests.unittests.test_sum_op import TestSumOp
+import numpy as np


 class TestMKLDNN(TestSumOp):
-    def init_kernel_type(self):
+    def setUp(self):
+        self.op_type = "sum"
+        self.init_kernel_type()
        self.use_mkldnn = True
+        x0 = np.random.random((3, 4)).astype(self.dtype)
+        x1 = np.random.random((3, 4)).astype(self.dtype)
+        x2 = np.random.random((3, 4)).astype(self.dtype)
+        self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
+        y = x0 + x1 + x2
+        self.outputs = {'Out': y}
+        self.attrs = {'use_mkldnn': self.use_mkldnn}
+
+    def init_kernel_type(self):
+        self.dtype = np.float32
+
+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=False)
+
+    def test_check_grad(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(['x0'], 'Out', check_dygraph=False)


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_int8_mkldnn_op.py
@@ -48,8 +48,9 @@ class TestTransposeOp(OpTest):
        self.op_type = "transpose2"

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_output_with_place(
-            core.CPUPlace(), 1e-5, no_check_set=['XShape'])
+            core.CPUPlace(), 1e-5, no_check_set=['XShape'], check_dygraph=False)

    def initTestCase(self):
        self.shape = (2, 3, 4, 5)

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py
@@ -17,14 +17,40 @@ from __future__ import print_function
 import unittest

 from paddle.fluid.tests.unittests.test_transpose_op import TestTransposeOp
+import numpy as np


 class TestTransposeMKLDNN(TestTransposeOp):
+    def setUp(self):
+        self.init_op_type()
+        self.initTestCase()
+        self.inputs = {'X': np.random.random(self.shape).astype("float32")}
+        self.attrs = {
+            'axis': list(self.axis),
+            'use_mkldnn': self.use_mkldnn,
+        }
+        self.outputs = {
+            'XShape': np.random.random(self.shape).astype("float32"),
+            'Out': self.inputs['X'].transpose(self.axis)
+        }
+
    def init_op_type(self):
        self.op_type = "transpose2"
        self.use_mkldnn = True
        return

+    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(no_check_set=['XShape'], check_dygraph=False)
+
+    def test_check_grad(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(['X'], 'Out', check_dygraph=False)
+
+    def initTestCase(self):
+        self.shape = (3, 4)
+        self.axis = (1, 0)
+

 class TestCase0MKLDNN(TestTransposeMKLDNN):
    def initTestCase(self):

--- a/python/paddle/fluid/tests/unittests/ngraph/test_increment_ngraph_op.py
+++ b/python/paddle/fluid/tests/unittests/ngraph/test_increment_ngraph_op.py
@@ -38,7 +38,7 @@ class TestNGRAPHIncrementOp(OpTest):
        self.check_output()

    def test_check_grad(self):
-        self.check_grad(['X'], 'Out')
+        self.check_grad(['X'], 'Out', check_dygraph=False)


 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -32,6 +32,7 @@ from paddle.fluid.op import Operator
 from paddle.fluid.executor import Executor
 from paddle.fluid.framework import Program, OpProtoHolder, Variable
 from testsuite import create_op, set_input, append_input_output, append_loss_ops
+from paddle.fluid import unique_name


 def randomize_probability(batch_size, class_num, dtype='float32'):
@@ -260,51 +261,109 @@ class OpTest(unittest.TestCase):
        else:
            return fluid.dygraph.base.to_variable(value)

+    def append_input_output_for_dygraph(self, op_proto, np_list, is_input,
+                                        if_return_inputs_grad_dict, block):
+        def create_var(np_value, name, is_input, if_return_inputs_grad_dict):
+            np_value_temp = np_value
+            has_lod = False
+            lod_temp = None
+            if isinstance(np_value, tuple):
+                np_value_temp = np_value[0]
+                has_lod = True
+                lod_temp = np_value[1]
+
+            if is_input:
+                v = self._create_var_from_numpy(np_value_temp)
+                if if_return_inputs_grad_dict:
+                    v.stop_gradient = False
+                if has_lod:
+                    v._ivar.value().get_tensor().set_recursive_sequence_lengths(
+                        lod_temp)
+            else:
+                v = block.create_var(
+                    name=name,
+                    dtype=np_value_temp.dtype,
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    persistable=False,
+                    stop_gradient=False)
+
+            return v
+
+        # prepare variable for input or output
+        var_dict = defaultdict(list)
+        if if_return_inputs_grad_dict:
+            inputs_grad_dict = defaultdict()
+        proto_list = op_proto.inputs if is_input else op_proto.outputs
+        for var_proto in proto_list:
+            name = var_proto.name
+            if (name not in np_list) and var_proto.dispensable:
+                continue
+            if name not in np_list:
+                assert var_proto.intermediate, "{} not found".format(name)
+                v = block.create_var(
+                    dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)
+                var_dict[name].append(v)
+                if if_return_inputs_grad_dict:
+                    inputs_grad_dict[name] = v
+                continue
+            if var_proto.duplicable:
+                assert isinstance(
+                    np_list[name],
+                    list), "Duplicable {} should be set as list".format(name)
+                var_list = []
+                slot_name = name
+                for (name, np_value) in np_list[name]:
+                    v = create_var(np_value, name, is_input,
+                                   if_return_inputs_grad_dict)
+                    var_list.append(v)
+                    if if_return_inputs_grad_dict:
+                        inputs_grad_dict[name] = v
+                var_dict[slot_name] = var_list
+            else:
+                nplist_value_temp = None
+                name_temp = None
+                if isinstance(np_list[name], list):
+                    nplist_value_temp = np_list[name][0]
+                    name_temp = name
+                else:
+                    nplist_value_temp = np_list[name]
+                    name_temp = unique_name.generate("%s_out" % (name))
+                v = create_var(nplist_value_temp, name_temp, is_input,
+                               if_return_inputs_grad_dict)
+                var_dict[name].append(v)
+                if if_return_inputs_grad_dict:
+                    inputs_grad_dict[name] = v
+
+        if if_return_inputs_grad_dict:
+            return var_dict, inputs_grad_dict
+        else:
+            return var_dict
+
    def _calc_dygraph_output(self, place, parallel=False, no_check_set=None):
        with fluid.dygraph.base.guard(place=place):
            block = fluid.default_main_program().global_block()

-            # prepare input variable
-            inputs = defaultdict(list)
-            for name, np_value in six.iteritems(self.inputs):
-                if not isinstance(np_value, list):
-                    np_value = [np_value]
+            op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)

-                for i in range(len(np_value)):
-                    inputs[name].append(
-                        self._create_var_from_numpy(np_value[i]))
+            # prepare input variable
+            inputs = self.append_input_output_for_dygraph(op_proto, self.inputs,
+                                                          True, False, block)

            # prepare output variable
-            outputs = defaultdict(list)
-            for name, np_value in six.iteritems(self.outputs):
-                if not isinstance(np_value, list):
-                    np_value = [np_value]
-
-                for i in range(len(np_value)):
-                    value = np_value[i]
-                    if isinstance(value, tuple):
-                        v = block.create_var(
-                            name="%s_out%d" % (name, i),
-                            dtype=value[0].dtype,
-                            type=core.VarDesc.VarType.LOD_TENSOR,
-                            persistable=False,
-                            stop_gradient=False)
-                        v._ivar.value().get_tensor(
-                        ).set_recursive_sequence_lengths(value[1])
-                    else:
-                        v = block.create_var(
-                            name="%s_out%d" % (name, i),
-                            dtype=value.dtype,
-                            type=core.VarDesc.VarType.LOD_TENSOR,
-                            persistable=False,
-                            stop_gradient=False)
-                    outputs[name].append(v)
-
+            outputs = self.append_input_output_for_dygraph(
+                op_proto, self.outputs, False, False, block)
+
+            # prepare attrbutes
+            attrs_outputs = {}
+            if hasattr(self, "attrs"):
+                for attrs_name in self.attrs:
+                    if self.attrs[attrs_name] is not None:
+                        attrs_outputs[attrs_name] = self.attrs[attrs_name]
            block.append_op(
                type=self.op_type,
                inputs=inputs,
                outputs=outputs,
-                attrs=self.attrs)
+                attrs=attrs_outputs if hasattr(self, "attrs") else None)
            return outputs

    def _calc_output(self,
@@ -725,7 +784,7 @@ class OpTest(unittest.TestCase):
                                atol,
                                no_check_set=None,
                                equal_nan=False,
-                                check_dygraph=False,
+                                check_dygraph=True,
                                inplace_atol=None):
        if check_dygraph:
            dygraph_outs = self._calc_dygraph_output(
@@ -737,6 +796,18 @@ class OpTest(unittest.TestCase):
            if no_check_set is not None and out_name in no_check_set:
                continue

+            def find_imperative_actual(target_name, dygraph_outs, place):
+                with fluid.dygraph.base.guard(place=place):
+                    for name in dygraph_outs:
+                        if name == target_name:
+                            return dygraph_outs[name][0]
+                        var_list = dygraph_outs[name]
+                        for i, var in enumerate(var_list):
+                            if var.name == target_name:
+                                return dygraph_outs[name][i]
+                    self.assertTrue(False, "Found failed {} {}".format(
+                        dygraph_outs.keys(), target_name))
+
            def find_actual(target_name, fetch_list):
                found = [
                    i for i, var_name in enumerate(fetch_list)
@@ -755,7 +826,8 @@ class OpTest(unittest.TestCase):
                for item in sub_out:
                    sub_out_name, expect = item[0], item[1]
                    if check_dygraph:
-                        imperative_actual = dygraph_outs[sub_out_name][0]
+                        imperative_actual = find_imperative_actual(
+                            sub_out_name, dygraph_outs, place)
                        imperative_actual_t = np.array(
                            imperative_actual._ivar.value().get_tensor())
                    idx = find_actual(sub_out_name, fetch_list)
@@ -782,15 +854,17 @@ class OpTest(unittest.TestCase):
                            actual.recursive_sequence_lengths(), expect[1],
                            "Output (" + sub_out_name +
                            ") has different lod at " + str(place))
-                    if check_dygraph:
-                        self.assertListEqual(
-                            imperative_actual._ivar.value().get_tensor()
-                            .recursive_sequence_lengths(), expect[1],
-                            "Output (" + out_name + ") has different lod at " +
-                            str(place) + " in dygraph mode")
+                        if check_dygraph:
+                            self.assertListEqual(
+                                imperative_actual._ivar.value().get_tensor()
+                                .recursive_sequence_lengths(), expect[1],
+                                "Output (" + out_name +
+                                ") has different lod at " + str(place) +
+                                " in dygraph mode")
            else:
                if check_dygraph:
-                    imperative_actual = dygraph_outs[out_name][0]
+                    imperative_actual = find_imperative_actual(
+                        out_name, dygraph_outs, place)
                    imperative_actual_t = np.array(
                        imperative_actual._ivar.value().get_tensor())
                idx = find_actual(out_name, fetch_list)
@@ -805,16 +879,22 @@ class OpTest(unittest.TestCase):
                    "\nExpect " + str(expect_t) + "\n" + "But Got" +
                    str(actual_t) + " in class " + self.__class__.__name__)
                if check_dygraph:
-                    self.assertTrue(
-                        np.allclose(
-                            imperative_actual_t,
-                            expect_t,
-                            atol=atol,
-                            equal_nan=equal_nan),
-                        "Output (" + out_name + ") has diff at " + str(place) +
-                        "\nExpect " + str(expect_t) + "\n" + "But Got" +
-                        str(imperative_actual_t) + " in class " +
-                        self.__class__.__name__)
+                    if six.moves.reduce(
+                            lambda x, y: x * y, imperative_actual_t.shape,
+                            1) == 0 and six.moves.reduce(
+                                lambda x, y: x * y, expect_t.shape, 1) == 0:
+                        pass
+                    else:
+                        self.assertTrue(
+                            np.allclose(
+                                imperative_actual_t,
+                                expect_t,
+                                atol=atol,
+                                equal_nan=equal_nan),
+                            "Output (" + out_name + ") has diff at " +
+                            str(place) + "\nExpect " + str(expect_t) + "\n" +
+                            "But Got" + str(imperative_actual_t) + " in class "
+                            + self.__class__.__name__)
                if isinstance(expect, tuple):
                    self.assertListEqual(actual.recursive_sequence_lengths(),
                                         expect[1], "Output (" + out_name +
@@ -909,7 +989,7 @@ class OpTest(unittest.TestCase):
                     atol=1e-5,
                     no_check_set=None,
                     equal_nan=False,
-                     check_dygraph=False,
+                     check_dygraph=True,
                     inplace_atol=None,
                     check_compile_vs_runtime=False):
        places = self._get_places()
@@ -957,13 +1037,14 @@ class OpTest(unittest.TestCase):
                   numeric_grad_delta=0.005,
                   in_place=False,
                   max_relative_error=0.005,
-                   user_defined_grads=None):
+                   user_defined_grads=None,
+                   check_dygraph=True):
        places = self._get_places()
        for place in places:
            self.check_grad_with_place(place, inputs_to_check, output_names,
                                       no_grad_set, numeric_grad_delta,
                                       in_place, max_relative_error,
-                                       user_defined_grads)
+                                       user_defined_grads, check_dygraph)

    def check_grad_with_place(self,
                              place,
@@ -973,7 +1054,8 @@ class OpTest(unittest.TestCase):
                              numeric_grad_delta=0.005,
                              in_place=False,
                              max_relative_error=0.005,
-                              user_defined_grads=None):
+                              user_defined_grads=None,
+                              check_dygraph=True):
        self.scope = core.Scope()
        op_inputs = self.inputs if hasattr(self, "inputs") else dict()
        op_outputs = self.outputs if hasattr(self, "outputs") else dict()
@@ -1009,11 +1091,118 @@ class OpTest(unittest.TestCase):
        ]
        analytic_grads = self._get_gradient(inputs_to_check, place,
                                            output_names, no_grad_set)
-
        self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
                              max_relative_error,
                              "Gradient Check On %s" % str(place))

+        if check_dygraph:
+            dygraph_grad = self._get_dygraph_grad(inputs_to_check, place,
+                                                  output_names, no_grad_set)
+            self._assert_is_close(numeric_grads, dygraph_grad, inputs_to_check,
+                                  max_relative_error,
+                                  "Gradient Check On %s" % str(place))
+
+    def _find_var_in_dygraph(self, output_vars, name):
+        if name in output_vars:
+            return output_vars[name]
+        else:
+            for output_vars_index in output_vars:
+                for output_vars_selected in output_vars[output_vars_index]:
+                    if output_vars_selected.name == name:
+                        return output_vars_selected
+
+    def _get_dygraph_grad(self,
+                          inputs_to_check,
+                          place,
+                          output_names,
+                          no_grad_set=None):
+        with fluid.dygraph.base.guard(place=place):
+            block = fluid.default_main_program().global_block()
+
+            op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
+
+            # prepare input variable
+            inputs, inputs_grad_dict = self.append_input_output_for_dygraph(
+                op_proto, self.inputs, True, True, block)
+
+            # prepare output variable
+            outputs = self.append_input_output_for_dygraph(
+                op_proto, self.outputs, False, False, block)
+
+            # prepare attrbutes
+            attrs_outputs = {}
+            if hasattr(self, "attrs"):
+                for attrs_name in self.attrs:
+                    if self.attrs[attrs_name] is not None:
+                        attrs_outputs[attrs_name] = self.attrs[attrs_name]
+            block.append_op(
+                type=self.op_type,
+                inputs=inputs,
+                outputs=outputs,
+                attrs=attrs_outputs if hasattr(self, "attrs") else None)
+
+            outputs_valid = {}
+            for output_name in output_names:
+                outputs_valid[output_name] = self._find_var_in_dygraph(
+                    outputs, output_name)
+
+            if len(outputs_valid) == 1:
+                loss = block.create_var(
+                    dtype=self.dtype,
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    persistable=False,
+                    stop_gradient=False,
+                    shape=[1])
+                for outputs_valid_key in outputs_valid:
+                    block.append_op(
+                        type="mean",
+                        inputs={"X": outputs_valid[outputs_valid_key]},
+                        outputs={"Out": [loss]},
+                        attrs=None)
+            else:
+                avg_sum = []
+                for cur_loss in outputs_valid:
+                    cur_avg_loss = block.create_var(
+                        dtype=self.dtype,
+                        type=core.VarDesc.VarType.LOD_TENSOR,
+                        persistable=False,
+                        stop_gradient=False)
+                    block.append_op(
+                        type="mean",
+                        inputs={"X": outputs_valid[cur_loss]},
+                        outputs={"Out": [cur_avg_loss]},
+                        attrs=None)
+                    avg_sum.append(cur_avg_loss)
+                loss_sum = block.create_var(
+                    dtype=self.dtype,
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    persistable=False,
+                    stop_gradient=False,
+                    shape=[1])
+                block.append_op(
+                    type='sum',
+                    inputs={"X": avg_sum},
+                    outputs={"Out": loss_sum},
+                    attrs=None)
+                loss = block.create_var(
+                    dtype=self.dtype,
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    persistable=False,
+                    stop_gradient=False,
+                    shape=[1])
+                block.append_op(
+                    type='scale',
+                    inputs={"X": loss_sum},
+                    outputs={"Out": loss},
+                    attrs={'scale': 1.0 / float(len(avg_sum))})
+            loss.backward()
+
+            fetch_list_grad = []
+            for inputs_to_check_name in inputs_to_check:
+                a = inputs_grad_dict[inputs_to_check_name].gradient()
+                fetch_list_grad.append(a)
+            return fetch_list_grad
+
    @staticmethod
    def _numpy_to_lod_tensor(np_value, lod, place):
        tensor = core.LoDTensor()

--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -319,34 +319,44 @@ class TestConv2dOp(OpTest):

    def test_check_output(self):
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
-        self.check_output_with_place(place, atol=1e-5)
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output_with_place(
+            place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))

    def test_check_grad(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
-            place, {'Input', 'Filter'}, 'Output', max_relative_error=0.02)
+            place, {'Input', 'Filter'},
+            'Output',
+            max_relative_error=0.02,
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_filter(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
            place, ['Input'],
            'Output',
            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+            no_grad_set=set(['Filter']),
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_input(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
            place, ['Filter'],
            'Output',
            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+            no_grad_set=set(['Input']),
+            check_dygraph=(self.use_mkldnn == False))

    def init_test_case(self):
        self.pad = [0, 0]
@@ -739,17 +749,24 @@ class TestConv2dOp_v2(OpTest):
                                                 self.use_cuda)

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
-        self.check_output_with_place(place, atol=1e-5)
+        self.check_output_with_place(
+            place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))

    def test_check_grad(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
        self.check_grad_with_place(
-            place, {'Input', 'Filter'}, 'Output', max_relative_error=0.02)
+            place, {'Input', 'Filter'},
+            'Output',
+            max_relative_error=0.02,
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_filter(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
@@ -757,9 +774,11 @@ class TestConv2dOp_v2(OpTest):
            place, ['Input'],
            'Output',
            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+            no_grad_set=set(['Filter']),
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_input(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cuda() else core.CPUPlace()
@@ -767,7 +786,8 @@ class TestConv2dOp_v2(OpTest):
            place, ['Filter'],
            'Output',
            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+            no_grad_set=set(['Input']),
+            check_dygraph=(self.use_mkldnn == False))

    def init_test_case(self):
        self.pad = [0, 0]

--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -142,11 +142,13 @@ class TestConv2dTransposeOp(OpTest):
        self.outputs = {'Output': output}

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.use_cudnn:
            place = core.CUDAPlace(0)
-            self.check_output_with_place(place, atol=1e-5)
+            self.check_output_with_place(
+                place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))
        else:
-            self.check_output()
+            self.check_output(check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_input(self):
        if self.use_cudnn:

--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -271,35 +271,45 @@ class TestConv3dOp(OpTest):
        return core.is_compiled_with_cuda() and self.use_cudnn

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
-        self.check_output_with_place(place, atol=1e-5)
+        self.check_output_with_place(
+            place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))

    def test_check_grad(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
-            place, {'Input', 'Filter'}, 'Output', max_relative_error=0.03)
+            place, {'Input', 'Filter'},
+            'Output',
+            max_relative_error=0.03,
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_filter(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
            place, ['Input'],
            'Output',
            max_relative_error=0.03,
-            no_grad_set=set(['Filter']))
+            no_grad_set=set(['Filter']),
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_no_input(self):
        if self.dtype == np.float16:
            return
        place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_grad_with_place(
-            place, ['Input'],
+            place, ['Filter'],
            'Output',
            max_relative_error=0.03,
-            no_grad_set=set(['Input']))
+            no_grad_set=set(['Input']),
+            check_dygraph=(self.use_mkldnn == False))

    def init_test_case(self):
        self.pad = [0, 0, 0]
@@ -560,7 +570,7 @@ class TestConv3dOp_2(OpTest):
            return
        place = core.CUDAPlace(0) if self.has_cudnn() else core.CPUPlace()
        self.check_grad_with_place(
-            place, ['Input'],
+            place, ['Filter'],
            'Output',
            max_relative_error=0.03,
            no_grad_set=set(['Input']))

--- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py
@@ -42,24 +42,40 @@ class TestElementwiseAddOp(OpTest):
        self.outputs = {'Out': self.out}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_normal(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
+        self.check_grad(
+            ['X', 'Y'],
+            'Out',
+            max_relative_error=0.005,
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_ingore_x(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
        self.check_grad(
-            ['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
+            ['Y'],
+            'Out',
+            max_relative_error=0.005,
+            no_grad_set=set("X"),
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_ingore_y(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.dtype == np.float16:
            return
        self.check_grad(
-            ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
+            ['X'],
+            'Out',
+            max_relative_error=0.005,
+            no_grad_set=set('Y'),
+            check_dygraph=(self.use_mkldnn == False))

    def init_input_output(self):
        self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
@@ -78,10 +94,12 @@ class TestFP16ElementwiseAddOp(TestElementwiseAddOp):
        self.dtype = np.float16

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
            if core.is_float16_supported(place):
-                self.check_output_with_place(place, atol=1e-3)
+                self.check_output_with_place(
+                    place, atol=1e-3, check_dygraph=(self.use_mkldnn == False))


 class TestElementwiseAddOp_scalar(TestElementwiseAddOp):

--- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
@@ -43,16 +43,29 @@ class ElementwiseMulOp(OpTest):
        self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_output(check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out')
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_ingore_x(self):
-        self.check_grad(['Y'], 'Out', no_grad_set=set("X"))
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['Y'],
+            'Out',
+            no_grad_set=set("X"),
+            check_dygraph=(self.use_mkldnn == False))

    def test_check_grad_ingore_y(self):
-        self.check_grad(['X'], 'Out', no_grad_set=set('Y'))
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.check_grad(
+            ['X'],
+            'Out',
+            no_grad_set=set('Y'),
+            check_dygraph=(self.use_mkldnn == False))

    def init_input_output(self):
        self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
@@ -74,6 +87,7 @@ class TestElementwiseMulOp_scalar(ElementwiseMulOp):
            'Y': np.random.rand(1).astype(np.float32)
        }
        self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
+        self.init_kernel_type()


 class TestElementwiseMulOp_Vector(ElementwiseMulOp):
@@ -84,6 +98,7 @@ class TestElementwiseMulOp_Vector(ElementwiseMulOp):
            'Y': np.random.random((32, )).astype("float64")
        }
        self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
+        self.init_kernel_type()


 class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp):
@@ -108,6 +123,7 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp):
        self.outputs = {
            'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 3, 1)
        }
+        self.init_kernel_type()


 class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
@@ -121,6 +137,7 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
        self.outputs = {
            'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 1, 4)
        }
+        self.init_kernel_type()


 class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
@@ -135,6 +152,7 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
        self.outputs = {
            'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 3, 4, 1)
        }
+        self.init_kernel_type()


 class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp):
@@ -145,6 +163,7 @@ class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp):
            'Y': np.random.rand(2, 1, 4).astype(np.float64)
        }
        self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
+        self.init_kernel_type()


 class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp):
@@ -155,6 +174,7 @@ class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp):
            'Y': np.random.rand(2, 3, 1, 5).astype(np.float64)
        }
        self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}
+        self.init_kernel_type()


 class TestElementwiseMulOpFp16(ElementwiseMulOp):

--- a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py
@@ -66,7 +66,7 @@ class TestFillAnyLikeOpOverflow(TestFillAnyLikeOp):
    def test_check_output(self):
        exception = None
        try:
-            self.check_output()
+            self.check_output(check_dygraph=False)
        except core.EnforceNotMet as ex:
            exception = ex
        self.assertIsNotNone(exception)

--- a/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_emb_seq_pool_op.py
@@ -45,12 +45,15 @@ class TestFusedEmbeddingSeqPoolOp(OpTest):
        }

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
+        # TODO(wangzhongpu): support lod in dygraph mode
        if ver.mkl() == "ON" and 'Linux' in platform.platform():
            self.attrs = {'is_sparse': False}
-            self.check_grad(['W'], 'Out', no_grad_set=('Ids'))
+            self.check_grad(
+                ['W'], 'Out', no_grad_set=('Ids'), check_dygraph=False)


 class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp):
@@ -74,14 +77,17 @@ class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp):
                    np.array(output), [len(self.lod[0]), 2 * self.emb_size])
            }
            self.attrs = {'padding_idx': int(padding_idx)}
-            self.check_output()
+            # TODO(wangzhongpu): support lod in dygraph mode
+            self.check_output(check_dygraph=False)

    def test_check_grad(self):
        if ver.mkl() == "ON" and 'Linux' in platform.platform():
            ids = np.squeeze(self.ids, axis=2)
            padding_idx = np.random.choice(ids.flatten(), 1)[0]
            self.attrs = {'padding_idx': int(padding_idx), 'is_sparse': False}
-            self.check_grad(['W'], 'Out', no_grad_set=('Ids'))
+            # TODO(wangzhongpu): support lod in dygraph mode
+            self.check_grad(
+                ['W'], 'Out', no_grad_set=('Ids'), check_dygraph=False)


 class TestFusedEmbeddingSeqPoolApi(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
@@ -34,10 +34,12 @@ class TestLodResetOpByAttr(OpTest):
        self.outputs = {'Out': (x, [target_lod])}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
-        self.check_grad(["X"], "Out")
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_grad(["X"], "Out", check_dygraph=False)


 class TestLodResetOpByInput(OpTest):
@@ -56,10 +58,12 @@ class TestLodResetOpByInput(OpTest):
        self.outputs = {'Out': (x, [target_lod])}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
-        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"), check_dygraph=False)


 class TestLodResetOpBoth(OpTest):
@@ -78,10 +82,12 @@ class TestLodResetOpBoth(OpTest):
        self.outputs = {'Out': (x, [target_lod_in])}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
-        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"), check_dygraph=False)


 class TestLodResetOpYIsLoDTensor(OpTest):
@@ -95,10 +101,12 @@ class TestLodResetOpYIsLoDTensor(OpTest):
        self.outputs = {'Out': (x, target_lod)}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
-        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"), check_dygraph=False)


 class TestLodAppendOpByAttr(OpTest):
@@ -116,10 +124,12 @@ class TestLodAppendOpByAttr(OpTest):
        self.outputs = {'Out': (x, out_lod)}

    def test_check_output(self):
-        self.check_output()
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_output(check_dygraph=False)

    def test_check_grad(self):
-        self.check_grad(["X"], "Out")
+        # TODO(wangzhongpu): support lod in dygraph mode
+        self.check_grad(["X"], "Out", check_dygraph=False)


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -171,18 +171,21 @@ class TestCUDNNLstmOp(OpTest):
        }

    def test_output_with_place(self):
+        # depend on the scope structure
        if self.has_cuda():
            place = core.CUDAPlace(0)
-            self.check_output_with_place(place, atol=1e-5)
+            self.check_output_with_place(place, atol=1e-5, check_dygraph=False)

    def test_grad_with_place(self):
+        # depend on the scope structure
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
            self.check_grad_with_place(
                place,
                set(['Input', 'W', 'InitH', 'InitC']),
                ['Out', 'last_h', 'last_c'],
-                max_relative_error=0.02)
+                max_relative_error=0.02,
+                check_dygraph=False)

    def has_cuda(self):
        return core.is_compiled_with_cuda()

--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
@@ -275,21 +275,32 @@ class TestPool2D_Op(OpTest):
        return core.is_compiled_with_cuda() and self.use_cudnn

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.has_cudnn():
            place = core.CUDAPlace(0)
-            self.check_output_with_place(place, atol=1e-5)
+            self.check_output_with_place(
+                place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))
        else:
-            self.check_output()
+            self.check_output(check_dygraph=(self.use_mkldnn == False))

    def test_check_grad(self):
        if self.dtype == np.float16:
            return
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.has_cudnn() and self.pool_type != "max":
            place = core.CUDAPlace(0)
            self.check_grad_with_place(
-                place, set(['X']), 'Out', max_relative_error=0.07)
+                place,
+                set(['X']),
+                'Out',
+                max_relative_error=0.07,
+                check_dygraph=(self.use_mkldnn == False))
        elif self.pool_type != "max":
-            self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
+            self.check_grad(
+                set(['X']),
+                'Out',
+                max_relative_error=0.07,
+                check_dygraph=(self.use_mkldnn == False))

    def init_data_format(self):
        self.data_format = "NCHW"
@@ -418,17 +429,26 @@ def create_test_cudnn_fp16_class(parent, check_grad=True):
            self.dtype = np.float16

        def test_check_output(self):
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
                if core.is_float16_supported(place):
-                    self.check_output_with_place(place, atol=1e-3)
+                    self.check_output_with_place(
+                        place,
+                        atol=1e-3,
+                        check_dygraph=(self.use_mkldnn == False))

        def test_check_grad(self):
+            # TODO(wangzhongpu): support mkldnn op in dygraph mode
            place = core.CUDAPlace(0)
            if core.is_float16_supported(
                    place) and self.pool_type != "max" and check_grad:
                self.check_grad_with_place(
-                    place, set(['X']), 'Out', max_relative_error=0.07)
+                    place,
+                    set(['X']),
+                    'Out',
+                    max_relative_error=0.07,
+                    check_dygraph=(self.use_mkldnn == False))

    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op")
    TestCUDNNFp16Case.__name__ = cls_name

--- a/python/paddle/fluid/tests/unittests/test_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py
@@ -62,20 +62,30 @@ class TestSoftmaxOp(OpTest):
        pass

    def test_check_output(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.use_cudnn:
            place = core.CUDAPlace(0)
-            self.check_output_with_place(place, atol=1e-5)
+            self.check_output_with_place(
+                place, atol=1e-5, check_dygraph=(self.use_mkldnn == False))
        else:
-            self.check_output()
+            self.check_output(check_dygraph=(self.use_mkldnn == False))

    def test_check_grad(self):
+        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        if self.use_cudnn or self.dtype == np.float16:
            place = core.CUDAPlace(0)
            if core.is_float16_supported(place):
                self.check_grad_with_place(
-                    place, ["X"], "Out", max_relative_error=0.01)
+                    place, ["X"],
+                    "Out",
+                    max_relative_error=0.01,
+                    check_dygraph=(self.use_mkldnn == False))
        else:
-            self.check_grad(["X"], "Out", max_relative_error=0.01)
+            self.check_grad(
+                ["X"],
+                "Out",
+                max_relative_error=0.01,
+                check_dygraph=(self.use_mkldnn == False))


 class TestSoftmaxOpError(OpTest):