From 57a3b8b69e750e47487a24d5c6888fc122a63fa5 Mon Sep 17 00:00:00 2001
From: wanghaoshuang <wanghaoshuang@baidu.com>
Date: Mon, 18 Sep 2017 15:18:24 +0800
Subject: [PATCH] 1. Implement GPUCrop kernel instead of eigen. 2. Fix unitest

---
 paddle/operators/crop_op.cc                   | 26 +++----
 paddle/operators/crop_op.cu                   |  8 +--
 paddle/operators/crop_op.h                    |  7 +-
 python/paddle/v2/framework/tests/op_test.py   | 10 +--
 .../paddle/v2/framework/tests/test_crop_op.py | 69 +++++++++----------
 5 files changed, 53 insertions(+), 67 deletions(-)
diff --git a/paddle/operators/crop_op.cc b/paddle/operators/crop_op.cc
index 9f4a3152e41..09fa13dfbbf 100644
--- a/paddle/operators/crop_op.cc
+++ b/paddle/operators/crop_op.cc
@@ -19,6 +19,7 @@ namespace paddle {
 namespace operators {
 
 using framework::Tensor;
+using framework::LoDTensor;
 
 class CropOp : public framework::OperatorWithKernel {
  public:
@@ -26,8 +27,8 @@ class CropOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto x_dim = ctx.Input<Tensor>("X")->dims();
-    auto Y = ctx.Input<Tensor>("Y");
+    auto x_dim = ctx.Input<LoDTensor>("X")->dims();
+    auto Y = ctx.Input<LoDTensor>("Y");
     if (Y == nullptr) {
       auto shape = Attr<std::vector<int>>("shape");
       PADDLE_ENFORCE_EQ(
@@ -37,9 +38,9 @@ class CropOp : public framework::OperatorWithKernel {
       for (size_t i = 0; i < shape.size(); ++i) {
         tensor_shape[i] = (int64_t)shape[i];
       }
-      ctx.Output<Tensor>("Out")->Resize(framework::make_ddim(tensor_shape));
+      ctx.Output<LoDTensor>("Out")->Resize(framework::make_ddim(tensor_shape));
     } else {
-      ctx.Output<Tensor>("Out")->Resize(Y->dims());
+      ctx.Output<LoDTensor>("Out")->Resize(Y->dims());
     }
   }
 };
@@ -112,8 +113,8 @@ class CropOpGrad : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                             "Input(Out@GRAD) should not be null");
-    auto x_dims = ctx.Input<Tensor>("X")->dims();
-    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto x_dims = ctx.Input<LoDTensor>("X")->dims();
+    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
     if (x_grad != nullptr) {
       x_grad->Resize(x_dims);
     }
@@ -141,23 +142,17 @@ template <typename T>
 class CropCPUKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    LOG(INFO) << "CropCPUKernel step1";
-    auto *x = context.Input<Tensor>("X");
-    LOG(INFO) << "CropCPUKernel step2";
-    auto *out = context.Output<Tensor>("Out");
-    LOG(INFO) << "CropCPUKernel step3";
+    auto *x = context.Input<LoDTensor>("X");
+    auto *out = context.Output<LoDTensor>("Out");
     auto x_data = x->data<T>();
-    T *out_data = out->mutable_data<T>(paddle::platform::CPUPlace());
-    LOG(INFO) << "CropCPUKernel step4";
+    T *out_data = out->mutable_data<T>(context.GetPlace());
     auto x_dims = x->dims();
     auto out_dims = out->dims();
-    LOG(INFO) << "CropCPUKernel step5";
     int64_t out_count = framework::product(out_dims);
     std::vector<int64_t> x_shape = framework::vectorize(x_dims);
     std::vector<int64_t> out_shape = framework::vectorize(out_dims);
 
     auto offsets = context.op().Attr<std::vector<int>>("offsets");
-    LOG(INFO) << "CropCPUKernel step6";
     PADDLE_ENFORCE_EQ(
         x_dims.size(), offsets.size(),
         "Offsets size should be equal to dimension size of input tensor.");
@@ -171,7 +166,6 @@ class CropCPUKernel : public framework::OpKernel {
     for (int64_t i = 0; i < out_count; ++i) {
       out_data[i] = x_data[transIndex(out_shape, x_shape, crop_rules, i)];
     }
-    LOG(INFO) << "CropCPUKernel step7";
   }
 };
 
diff --git a/paddle/operators/crop_op.cu b/paddle/operators/crop_op.cu
index f39478858a8..1715b2eaf9c 100644
--- a/paddle/operators/crop_op.cu
+++ b/paddle/operators/crop_op.cu
@@ -20,6 +20,7 @@ namespace paddle {
 namespace operators {
 
 using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
 
 template <typename T, int D>
 __global__ void CropKernel(const int N, const int64_t* out_shape,
@@ -48,9 +49,8 @@ template <typename T, int D>
 void CropCUDAFunctoin(const framework::ExecutionContext& context) {
   PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
                  "It must use GPUPlace.");
-  LOG(INFO) << "CropCUDAFunctoin step1";
-  auto* x = context.Input<Tensor>("X");
-  auto* out = context.Output<Tensor>("Out");
+  auto* x = context.Input<LoDTensor>("X");
+  auto* out = context.Output<LoDTensor>("Out");
   auto x_data = x->data<T>();
   T* out_data = out->mutable_data<T>(paddle::platform::GPUPlace());
   auto x_dims = x->dims();
@@ -100,7 +100,7 @@ template <typename T>
 class CropOpCUDAKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    size_t rank = context.Input<Tensor>("X")->dims().size();
+    size_t rank = context.Input<LoDTensor>("X")->dims().size();
     switch (rank) {
       case 1:
         CropCUDAFunctoin<T, 1>(context);
diff --git a/paddle/operators/crop_op.h b/paddle/operators/crop_op.h
index 40bd0246749..7f041737a7e 100644
--- a/paddle/operators/crop_op.h
+++ b/paddle/operators/crop_op.h
@@ -25,11 +25,12 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
 using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
 
 using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
 
 template <typename Place, typename T, size_t D>
 void CropGradFunction(const framework::ExecutionContext& context) {
-  auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
-  auto* d_x = context.Output<Tensor>(framework::GradVarName("X"));
+  auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
+  auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
   if (d_x != nullptr) {
     d_x->mutable_data<T>(context.GetPlace());
     auto d_x_dims = d_x->dims();
@@ -52,7 +53,7 @@ class CropGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     size_t rank =
-        context.Input<Tensor>(framework::GradVarName("Out"))->dims().size();
+        context.Input<LoDTensor>(framework::GradVarName("Out"))->dims().size();
     switch (rank) {
       case 1:
         CropGradFunction<Place, T, 1>(context);
diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py
index 8e111af4678..a0533efacdc 100644
--- a/python/paddle/v2/framework/tests/op_test.py
+++ b/python/paddle/v2/framework/tests/op_test.py
@@ -64,7 +64,6 @@ def set_input(scope, op, inputs, place):
                 tensor.set_dims(in_array.shape)
                 tensor.set(in_array, place)
                 if isinstance(in_val, tuple):
-                    print "set lod"
                     tensor.set_lod(in_val[1])
 
 
@@ -189,10 +188,8 @@ class OpTest(unittest.TestCase):
         self.op.infer_shape(self.scope)
         ctx = core.DeviceContext.create(place)
         self.op.run(self.scope, ctx)
-        print "finish self.op.run"
+
         for out_name, out_dup in Operator.get_op_outputs(self.op.type()):
-            print "finish Operator.get_op_outputs"
-            print "out_dup=%s; out_name=%s" % (out_dup, out_name)
             if out_dup:
                 sub_out = self.outputs[out_name]
                 for sub_out_name in sub_out:
@@ -204,17 +201,12 @@ class OpTest(unittest.TestCase):
                             actual, expect, atol=1e-05),
                         "output name: " + out_name + "has diff")
             else:
-                v = self.scope.find_var(out_name)
-                print "var=%s" % v
-                print "tensor=%s" % v.get_tensor()
                 actual = np.array(self.scope.find_var(out_name).get_tensor())
-                print "actual=%s" % actual
                 expect = self.outputs[out_name]
                 self.assertTrue(
                     np.allclose(
                         actual, expect, atol=1e-05),
                     "output name: " + out_name + "has diff")
-                print "finish check in %s" % place
 
     def check_output(self):
         places = [core.CPUPlace()]
diff --git a/python/paddle/v2/framework/tests/test_crop_op.py b/python/paddle/v2/framework/tests/test_crop_op.py
index 45f13d84e51..62c883bdc13 100644
--- a/python/paddle/v2/framework/tests/test_crop_op.py
+++ b/python/paddle/v2/framework/tests/test_crop_op.py
@@ -47,45 +47,44 @@ class TestCropOp(OpTest):
 
     def initTestCase(self):
         self.x_shape = (8, 8)
-        self.crop_shape = [2, 2]
+        self.crop_shape = (2, 2)
         self.offsets = [1, 2]
 
     def test_check_output(self):
         self.check_output()
-        print "finish check_output"
-
-    #def test_check_grad_normal(self):
-    #    self.check_grad(['X'], 'Out', max_relative_error=0.006)
-
-    #class TestCase1(TestCropOp):
-    #    def initTestCase(self):
-    #        self.x_shape = (16, 16, 16)
-    #        self.crop_shape = [2, 2, 3]
-    #        self.offsets = [1, 5, 3]
-    #
-    #
-    #class TestCase2(TestCropOp):
-    #    def initTestCase(self):
-    #        self.x_shape = (4, 4)
-    #        self.crop_shape = [4, 4]
-    #        self.offsets = [0, 0]
-    #
-    #
-    #class TestCase3(TestCropOp):
-    #    def initTestCase(self):
-    #        self.x_shape = (16, 16, 16)
-    #        self.crop_shape = [2, 2, 3]
-    #        self.offsets = [1, 5, 3]
-    #        self.crop_by_input = True
-    #
-    #
-    #class TestCase4(TestCropOp):
-    #    def initTestCase(self):
-    #        self.x_shape = (4, 4)
-    #        self.crop_shape = [4, 4]
-    #        self.offsets = [0, 0]
-    #        self.crop_by_input = True
-    #
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X'], 'Out', max_relative_error=0.006)
+
+
+class TestCase1(TestCropOp):
+    def initTestCase(self):
+        self.x_shape = (16, 8, 32)
+        self.crop_shape = [2, 2, 3]
+        self.offsets = [1, 5, 3]
+
+
+class TestCase2(TestCropOp):
+    def initTestCase(self):
+        self.x_shape = (4, 8)
+        self.crop_shape = [4, 8]
+        self.offsets = [0, 0]
+
+
+class TestCase3(TestCropOp):
+    def initTestCase(self):
+        self.x_shape = (4, 8, 16)
+        self.crop_shape = [2, 2, 3]
+        self.offsets = [1, 5, 3]
+        self.crop_by_input = True
+
+
+class TestCase4(TestCropOp):
+    def initTestCase(self):
+        self.x_shape = (4, 4)
+        self.crop_shape = [4, 4]
+        self.offsets = [0, 0]
+        self.crop_by_input = True
 
 
 if __name__ == '__main__':
-- 
GitLab