1. Implement GPUCrop kernel instead of eigen.

2. Fix unitest

1. Implement GPUCrop kernel instead of eigen.
2. Fix unitest
57a3b8b6 · wanghaoshuang · 57011b20 · 57a3b8b6 · 57a3b8b6 · 57a3b8b6
5 changed file
--- a/paddle/operators/crop_op.cc
+++ b/paddle/operators/crop_op.cc
@@ -19,6 +19,7 @@ namespace paddle {
 namespace operators {
 using framework::Tensor;
+using framework::LoDTensor;
 class CropOp : public framework::OperatorWithKernel {
 public:
@@ -26,8 +27,8 @@ class CropOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto x_dim = ctx.Input<Tensor>("X")->dims();
+    auto x_dim = ctx.Input<LoDTensor>("X")->dims();
-    auto Y = ctx.Input<Tensor>("Y");
+    auto Y = ctx.Input<LoDTensor>("Y");
    if (Y == nullptr) {
      auto shape = Attr<std::vector<int>>("shape");
      PADDLE_ENFORCE_EQ(
@@ -37,9 +38,9 @@ class CropOp : public framework::OperatorWithKernel {
      for (size_t i = 0; i < shape.size(); ++i) {
        tensor_shape[i] = (int64_t)shape[i];
      }
-      ctx.Output<Tensor>("Out")->Resize(framework::make_ddim(tensor_shape));
+      ctx.Output<LoDTensor>("Out")->Resize(framework::make_ddim(tensor_shape));
    } else {
-      ctx.Output<Tensor>("Out")->Resize(Y->dims());
+      ctx.Output<LoDTensor>("Out")->Resize(Y->dims());
    }
  }
 };
@@ -112,8 +113,8 @@ class CropOpGrad : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Input(Out@GRAD) should not be null");
-    auto x_dims = ctx.Input<Tensor>("X")->dims();
+    auto x_dims = ctx.Input<LoDTensor>("X")->dims();
-    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
    if (x_grad != nullptr) {
      x_grad->Resize(x_dims);
    }
@@ -141,23 +142,17 @@ template <typename T>
 class CropCPUKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    LOG(INFO) << "CropCPUKernel step1";
+    auto *x = context.Input<LoDTensor>("X");
-    auto *x = context.Input<Tensor>("X");
+    auto *out = context.Output<LoDTensor>("Out");
-    LOG(INFO) << "CropCPUKernel step2";
-    auto *out = context.Output<Tensor>("Out");
-    LOG(INFO) << "CropCPUKernel step3";
    auto x_data = x->data<T>();
-    T *out_data = out->mutable_data<T>(paddle::platform::CPUPlace());
+    T *out_data = out->mutable_data<T>(context.GetPlace());
-    LOG(INFO) << "CropCPUKernel step4";
    auto x_dims = x->dims();
    auto out_dims = out->dims();
-    LOG(INFO) << "CropCPUKernel step5";
    int64_t out_count = framework::product(out_dims);
    std::vector<int64_t> x_shape = framework::vectorize(x_dims);
    std::vector<int64_t> out_shape = framework::vectorize(out_dims);
    auto offsets = context.op().Attr<std::vector<int>>("offsets");
-    LOG(INFO) << "CropCPUKernel step6";
    PADDLE_ENFORCE_EQ(
        x_dims.size(), offsets.size(),
        "Offsets size should be equal to dimension size of input tensor.");
@@ -171,7 +166,6 @@ class CropCPUKernel : public framework::OpKernel {
    for (int64_t i = 0; i < out_count; ++i) {
      out_data[i] = x_data[transIndex(out_shape, x_shape, crop_rules, i)];
    }
-    LOG(INFO) << "CropCPUKernel step7";
  }
 };

--- a/paddle/operators/crop_op.cu
+++ b/paddle/operators/crop_op.cu
@@ -20,6 +20,7 @@ namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
 template <typename T, int D>
 __global__ void CropKernel(const int N, const int64_t* out_shape,
@@ -48,9 +49,8 @@ template <typename T, int D>
 void CropCUDAFunctoin(const framework::ExecutionContext& context) {
  PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
                 "It must use GPUPlace.");
-  LOG(INFO) << "CropCUDAFunctoin step1";
+  auto* x = context.Input<LoDTensor>("X");
-  auto* x = context.Input<Tensor>("X");
+  auto* out = context.Output<LoDTensor>("Out");
-  auto* out = context.Output<Tensor>("Out");
  auto x_data = x->data<T>();
  T* out_data = out->mutable_data<T>(paddle::platform::GPUPlace());
  auto x_dims = x->dims();
@@ -100,7 +100,7 @@ template <typename T>
 class CropOpCUDAKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    size_t rank = context.Input<Tensor>("X")->dims().size();
+    size_t rank = context.Input<LoDTensor>("X")->dims().size();
    switch (rank) {
      case 1:
        CropCUDAFunctoin<T, 1>(context);

--- a/paddle/operators/crop_op.h
+++ b/paddle/operators/crop_op.h
@@ -25,11 +25,12 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
 using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
 using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
 template <typename Place, typename T, size_t D>
 void CropGradFunction(const framework::ExecutionContext& context) {
-  auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
+  auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
-  auto* d_x = context.Output<Tensor>(framework::GradVarName("X"));
+  auto* d_x = context.Output<LoDTensor>(framework::GradVarName("X"));
  if (d_x != nullptr) {
    d_x->mutable_data<T>(context.GetPlace());
    auto d_x_dims = d_x->dims();
@@ -52,7 +53,7 @@ class CropGradKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    size_t rank =
-        context.Input<Tensor>(framework::GradVarName("Out"))->dims().size();
+        context.Input<LoDTensor>(framework::GradVarName("Out"))->dims().size();
    switch (rank) {
      case 1:
        CropGradFunction<Place, T, 1>(context);

--- a/python/paddle/v2/framework/tests/op_test.py
+++ b/python/paddle/v2/framework/tests/op_test.py
@@ -64,7 +64,6 @@ def set_input(scope, op, inputs, place):
                tensor.set_dims(in_array.shape)
                tensor.set(in_array, place)
                if isinstance(in_val, tuple):
-                    print "set lod"
                    tensor.set_lod(in_val[1])
@@ -189,10 +188,8 @@ class OpTest(unittest.TestCase):
        self.op.infer_shape(self.scope)
        ctx = core.DeviceContext.create(place)
        self.op.run(self.scope, ctx)
-        print "finish self.op.run"
        for out_name, out_dup in Operator.get_op_outputs(self.op.type()):
-            print "finish Operator.get_op_outputs"
-            print "out_dup=%s; out_name=%s" % (out_dup, out_name)
            if out_dup:
                sub_out = self.outputs[out_name]
                for sub_out_name in sub_out:
@@ -204,17 +201,12 @@ class OpTest(unittest.TestCase):
                            actual, expect, atol=1e-05),
                        "output name: " + out_name + "has diff")
            else:
-                v = self.scope.find_var(out_name)
-                print "var=%s" % v
-                print "tensor=%s" % v.get_tensor()
                actual = np.array(self.scope.find_var(out_name).get_tensor())
-                print "actual=%s" % actual
                expect = self.outputs[out_name]
                self.assertTrue(
                    np.allclose(
                        actual, expect, atol=1e-05),
                    "output name: " + out_name + "has diff")
-                print "finish check in %s" % place
    def check_output(self):
        places = [core.CPUPlace()]

--- a/python/paddle/v2/framework/tests/test_crop_op.py
+++ b/python/paddle/v2/framework/tests/test_crop_op.py
@@ -47,45 +47,44 @@ class TestCropOp(OpTest):
    def initTestCase(self):
        self.x_shape = (8, 8)
-        self.crop_shape = [2, 2]
+        self.crop_shape = (2, 2)
        self.offsets = [1, 2]
    def test_check_output(self):
        self.check_output()
-        print "finish check_output"
+    def test_check_grad_normal(self):
-    #def test_check_grad_normal(self):
+        self.check_grad(['X'], 'Out', max_relative_error=0.006)
-    #    self.check_grad(['X'], 'Out', max_relative_error=0.006)
-    #class TestCase1(TestCropOp):
+class TestCase1(TestCropOp):
-    #    def initTestCase(self):
+    def initTestCase(self):
-    #        self.x_shape = (16, 16, 16)
+        self.x_shape = (16, 8, 32)
-    #        self.crop_shape = [2, 2, 3]
+        self.crop_shape = [2, 2, 3]
-    #        self.offsets = [1, 5, 3]
+        self.offsets = [1, 5, 3]
-    #
-    #
-    #class TestCase2(TestCropOp):
+class TestCase2(TestCropOp):
-    #    def initTestCase(self):
+    def initTestCase(self):
-    #        self.x_shape = (4, 4)
+        self.x_shape = (4, 8)
-    #        self.crop_shape = [4, 4]
+        self.crop_shape = [4, 8]
-    #        self.offsets = [0, 0]
+        self.offsets = [0, 0]
-    #
-    #
-    #class TestCase3(TestCropOp):
+class TestCase3(TestCropOp):
-    #    def initTestCase(self):
+    def initTestCase(self):
-    #        self.x_shape = (16, 16, 16)
+        self.x_shape = (4, 8, 16)
-    #        self.crop_shape = [2, 2, 3]
+        self.crop_shape = [2, 2, 3]
-    #        self.offsets = [1, 5, 3]
+        self.offsets = [1, 5, 3]
-    #        self.crop_by_input = True
+        self.crop_by_input = True
-    #
-    #
-    #class TestCase4(TestCropOp):
+class TestCase4(TestCropOp):
-    #    def initTestCase(self):
+    def initTestCase(self):
-    #        self.x_shape = (4, 4)
+        self.x_shape = (4, 4)
-    #        self.crop_shape = [4, 4]
+        self.crop_shape = [4, 4]
-    #        self.offsets = [0, 0]
+        self.offsets = [0, 0]
-    #        self.crop_by_input = True
+        self.crop_by_input = True
-    #
 if __name__ == '__main__':