From e89bf25b15d47b669f889667738957bb0c3dfee1 Mon Sep 17 00:00:00 2001
From: houj04 <35131887+houj04@users.noreply.github.com>
Date: Tue, 22 Feb 2022 12:55:41 +0800
Subject: [PATCH] update unittests for nearest_interp_v2_op_xpu: 'sync' from
 gpu. test=kunlun (#39768)

---
 paddle/fluid/operators/interpolate_v2_op.h    |   7 +
 .../fluid/operators/interpolate_v2_op_xpu.cc  |  18 +-
 .../xpu/test_nearest_interp_v2_op_xpu.py      | 202 +++++++++++++++---
 3 files changed, 177 insertions(+), 50 deletions(-)
diff --git a/paddle/fluid/operators/interpolate_v2_op.h b/paddle/fluid/operators/interpolate_v2_op.h
index 66ab1e14390..f99d3f6c324 100644
--- a/paddle/fluid/operators/interpolate_v2_op.h
+++ b/paddle/fluid/operators/interpolate_v2_op.h
@@ -65,6 +65,13 @@ inline std::vector<T> get_new_data_from_tensor(const Tensor* new_data_tensor) {
                                       &cpu_starts_tensor);
     new_data = cpu_starts_tensor.data<T>();
   }
+#endif
+#ifdef PADDLE_WITH_XPU
+  if (platform::is_xpu_place(new_data_tensor->place())) {
+    paddle::framework::TensorCopySync(*new_data_tensor, platform::CPUPlace(),
+                                      &cpu_starts_tensor);
+    new_data = cpu_starts_tensor.data<T>();
+  }
 #endif
   vec_new_data = std::vector<T>(new_data, new_data + new_data_tensor->numel());
   return vec_new_data;
diff --git a/paddle/fluid/operators/interpolate_v2_op_xpu.cc b/paddle/fluid/operators/interpolate_v2_op_xpu.cc
index 66314cb7445..850dbe025b9 100644
--- a/paddle/fluid/operators/interpolate_v2_op_xpu.cc
+++ b/paddle/fluid/operators/interpolate_v2_op_xpu.cc
@@ -14,7 +14,7 @@
 #include <vector>
 
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/interpolate_op.h"
+#include "paddle/fluid/operators/interpolate_v2_op.h"
 
 #ifdef PADDLE_WITH_XPU
 
@@ -41,18 +41,6 @@ inline std::vector<int> get_new_shape_xpu(
   return vec_new_shape;
 }
 
-template <typename T>
-inline std::vector<T> get_new_data_from_tensor_xpu(
-    const Tensor* new_data_tensor) {
-  std::vector<T> vec_new_data;
-  framework::Tensor cpu_starts_tensor;
-  paddle::framework::TensorCopySync(*new_data_tensor, platform::CPUPlace(),
-                                    &cpu_starts_tensor);
-  auto* new_data = cpu_starts_tensor.data<T>();
-  vec_new_data = std::vector<T>(new_data, new_data + new_data_tensor->numel());
-  return vec_new_data;
-}
-
 template <typename T>
 class InterpolateV2XPUKernel : public framework::OpKernel<T> {
  public:
@@ -90,7 +78,7 @@ class InterpolateV2XPUKernel : public framework::OpKernel<T> {
       auto scale_tensor = ctx.Input<Tensor>("Scale");
       auto scale = ctx.Attr<std::vector<float>>("scale");
       if (scale_tensor != nullptr) {
-        auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
+        auto scale_data = get_new_data_from_tensor<float>(scale_tensor);
         if (scale_data.size() > 1) {
           scale_h = scale_data[0];
           scale_w = scale_data[1];
@@ -202,7 +190,7 @@ class InterpolateV2GradXPUKernel : public framework::OpKernel<T> {
       auto scale_tensor = ctx.Input<Tensor>("Scale");
       auto scale = ctx.Attr<std::vector<float>>("scale");
       if (scale_tensor != nullptr) {
-        auto scale_data = get_new_data_from_tensor_xpu<float>(scale_tensor);
+        auto scale_data = get_new_data_from_tensor<float>(scale_tensor);
         if (scale_data.size() > 1) {
           scale_h = scale_data[0];
           scale_w = scale_data[1];
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
index 8de8125166f..8c1ce68e9d0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_nearest_interp_v2_op_xpu.py
@@ -81,7 +81,80 @@ def nearest_neighbor_interp_np(X,
 
     if data_layout == "NHWC":
         out = np.transpose(out, (0, 2, 3, 1))  # NCHW => NHWC
+    # out = np.expand_dims(out, 2)
+    return out.astype(X.dtype)
+
+
+def nearest_neighbor_interp3d_np(X,
+                                 out_d,
+                                 out_h,
+                                 out_w,
+                                 scale_d=0,
+                                 scale_h=0,
+                                 scale_w=0,
+                                 out_size=None,
+                                 actual_shape=None,
+                                 align_corners=True,
+                                 data_layout='NCHW'):
+    """nearest neighbor interpolation implement in shape [N, C, H, W]"""
+    if data_layout == "NHWC":
+        X = np.transpose(X, (0, 4, 1, 2, 3))  # NDHWC => NCDHW
+    if out_size is not None:
+        out_d = out_size[0]
+        out_h = out_size[1]
+        out_w = out_size[2]
+    if actual_shape is not None:
+        out_d = actual_shape[0]
+        out_h = actual_shape[1]
+        out_w = actual_shape[2]
+    n, c, in_d, in_h, in_w = X.shape
 
+    ratio_d = ratio_h = ratio_w = 0.0
+    if (out_d > 1):
+        if (align_corners):
+            ratio_d = (in_d - 1.0) / (out_d - 1.0)
+        else:
+            if scale_d > 0:
+                ratio_d = 1.0 / scale_d
+            else:
+                ratio_d = 1.0 * in_d / out_d
+    if (out_h > 1):
+        if (align_corners):
+            ratio_h = (in_h - 1.0) / (out_h - 1.0)
+        else:
+            if scale_h > 0:
+                ratio_h = 1.0 / scale_h
+            else:
+                ratio_h = 1.0 * in_h / out_h
+    if (out_w > 1):
+        if (align_corners):
+            ratio_w = (in_w - 1.0) / (out_w - 1.0)
+        else:
+            if scale_w > 0:
+                ratio_w = 1.0 / scale_w
+            else:
+                ratio_w = 1.0 * in_w / out_w
+    out = np.zeros((n, c, out_d, out_h, out_w))
+
+    if align_corners:
+        for d in range(out_d):
+            in_d = int(ratio_d * d + 0.5)
+            for i in range(out_h):
+                in_i = int(ratio_h * i + 0.5)
+                for j in range(out_w):
+                    in_j = int(ratio_w * j + 0.5)
+                    out[:, :, d, i, j] = X[:, :, in_d, in_i, in_j]
+    else:
+        for d in range(out_d):
+            in_d = int(ratio_d * d)
+            for i in range(out_h):
+                in_i = int(ratio_h * i)
+                for j in range(out_w):
+                    in_j = int(ratio_w * j)
+                    out[:, :, d, i, j] = X[:, :, in_d, in_i, in_j]
+
+    if data_layout == "NDHWC":
+        out = np.transpose(out, (0, 2, 3, 4, 1))  # NCDHW => NDHWC
     return out.astype(X.dtype)
 
 
@@ -90,46 +163,86 @@ class TestNearestInterpOp(XPUOpTest):
         self.use_xpu = True
         self.out_size = None
         self.actual_shape = None
+        self.data_layout = 'NCHW'
         self.init_test_case()
         self.op_type = "nearest_interp_v2"
-        self.shape_by_1Dtensor = False
-        self.scale_by_1Dtensor = False
-        self.attrs = {
-            'interp_method': self.interp_method,
-            'align_corners': self.align_corners,
-        }
-
         input_np = np.random.random(self.input_shape).astype("float32")
-        self.inputs = {'X': input_np}
 
-        if self.scale_by_1Dtensor:
-            self.inputs['Scale'] = np.array([self.scale]).astype("float32")
-        elif self.scale:
+        if self.data_layout == "NCHW" and len(self.input_shape) == 4:
+            in_d = 1
+            in_h = self.input_shape[2]
+            in_w = self.input_shape[3]
+        else:
+            in_d = 1
+            in_h = self.input_shape[1]
+            in_w = self.input_shape[2]
+
+        if self.data_layout == "NCDHW" and len(self.input_shape) == 5:
+            in_d = self.input_shape[2]
+            in_h = self.input_shape[3]
+            in_w = self.input_shape[4]
+        else:
+            in_d = self.input_shape[1]
+            in_h = self.input_shape[2]
+            in_w = self.input_shape[3]
+        scale_d = 0
+        scale_h = 0
+        scale_w = 0
+        if self.scale:
             if isinstance(self.scale, float) or isinstance(self.scale, int):
                 if self.scale > 0:
-                    scale_h = scale_w = float(self.scale)
+                    scale_d = scale_h = scale_w = float(self.scale)
             if isinstance(self.scale, list) and len(self.scale) == 1:
-                scale_w = scale_h = self.scale[0]
+                scale_d = scale_w = scale_h = self.scale[0]
             elif isinstance(self.scale, list) and len(self.scale) > 1:
-                scale_w = self.scale[1]
-                scale_h = self.scale[0]
-            out_h = int(self.input_shape[2] * scale_h)
-            out_w = int(self.input_shape[3] * scale_w)
+                if len(self.scale) == 5:
+                    scale_w = self.scale[2]
+                    scale_h = self.scale[1]
+                    scale_d = self.scale[0]
+                else:
+                    scale_w = self.scale[1]
+                    scale_h = self.scale[0]
+
+            out_h = int(in_h * scale_h)
+            out_w = int(in_w * scale_w)
+            out_d = int(in_d * scale_d)
         else:
+            if len(self.input_shape) == 5:
+                out_d = self.out_d
             out_h = self.out_h
             out_w = self.out_w
 
-        if self.shape_by_1Dtensor:
+        if len(self.input_shape) == 4:
+            output_np = nearest_neighbor_interp_np(
+                input_np, out_h, out_w, scale_h, scale_w, self.out_size,
+                self.actual_shape, self.align_corners, self.data_layout)
+        elif len(self.input_shape) == 5:
+            output_np = nearest_neighbor_interp3d_np(
+                input_np, out_d, out_h, out_w, scale_d, scale_h, scale_w,
+                self.out_size, self.actual_shape, self.align_corners,
+                self.data_layout)
+        self.inputs = {'X': input_np}
+        if self.out_size is not None:
             self.inputs['OutSize'] = self.out_size
-        elif self.out_size is not None:
-            size_tensor = []
-            for index, ele in enumerate(self.out_size):
-                size_tensor.append(("x" + str(index), np.ones(
-                    (1)).astype('int32') * ele))
-            self.inputs['SizeTensor'] = size_tensor
-
-        self.attrs['out_h'] = self.out_h
-        self.attrs['out_w'] = self.out_w
+        if self.actual_shape is not None:
+            self.inputs['OutSize'] = self.actual_shape
+        if len(self.input_shape) == 5:
+            self.attrs = {
+                'out_d': self.out_d,
+                'out_h': self.out_h,
+                'out_w': self.out_w,
+                'interp_method': self.interp_method,
+                'align_corners': self.align_corners,
+                'data_layout': self.data_layout
+            }
+        else:
+            self.attrs = {
+                'out_h': self.out_h,
+                'out_w': self.out_w,
+                'interp_method': self.interp_method,
+                'align_corners': self.align_corners,
+                'data_layout': self.data_layout
+            }
         if self.scale:
             if isinstance(self.scale, float) or isinstance(self.scale, int):
                 if self.scale > 0:
@@ -137,9 +250,6 @@ class TestNearestInterpOp(XPUOpTest):
             if isinstance(self.scale, list) and len(self.scale) == 1:
                 self.scale = [self.scale[0], self.scale[0]]
             self.attrs['scale'] = self.scale
-        output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, 0, 0,
-                                               self.out_size, self.actual_shape,
-                                               self.align_corners)
         self.outputs = {'Out': output_np}
 
     def test_check_output(self):
@@ -154,22 +264,26 @@ class TestNearestInterpOp(XPUOpTest):
 
     def init_test_case(self):
         self.interp_method = 'nearest'
-        self.input_shape = [2, 5, 4, 4]
-        self.out_h = 3
-        self.out_w = 3
+        self.input_shape = [2, 3, 4, 5]
+        self.out_h = 2
+        self.out_w = 2
         self.scale = 0.
-        self.out_size = [3, 3]
+        self.out_size = np.array([3, 3]).astype("int32")
         self.align_corners = True
 
 
+"""
+# case copied form gpu but disabled in xpu: not support 5-dim input_shape
 class TestNearestNeighborInterpCase1(TestNearestInterpOp):
     def init_test_case(self):
         self.interp_method = 'nearest'
-        self.input_shape = [4, 1, 7, 8]
+        self.input_shape = [4, 1, 1, 7, 8]
+        self.out_d = 1
         self.out_h = 1
         self.out_w = 1
         self.scale = 0.
         self.align_corners = True
+"""
 
 
 class TestNearestNeighborInterpCase2(TestNearestInterpOp):
@@ -246,6 +360,8 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
         self.align_corners = True
 
 
+"""
+# case copied form gpu but disabled in xpu: not support NHWC data_layout
 class TestNearestNeighborInterpDataLayout(TestNearestInterpOp):
     def init_test_case(self):
         self.interp_method = 'nearest'
@@ -256,6 +372,7 @@ class TestNearestNeighborInterpDataLayout(TestNearestInterpOp):
         self.out_size = np.array([3, 8]).astype("int32")
         self.align_corners = True
         self.data_layout = "NHWC"
+"""
 
 
 class TestNearestInterpWithoutCorners(TestNearestInterpOp):
@@ -296,6 +413,21 @@ class TestNearestNeighborInterpScale3(TestNearestInterpOp):
         self.align_corners = True
 
 
+"""
+# case copied form gpu but disabled in xpu: not support 5-dim input_shape
+class TestNearestNeighbor3DInterp(TestNearestInterpOp):
+    def init_test_case(self):
+        self.interp_method = 'nearest'
+        self.input_shape = [3, 2, 4, 7, 5]
+        self.out_d = 8
+        self.out_h = 64
+        self.out_w = 32
+        self.scale = [4.0, 2.0, 3.0]
+        self.out_size = np.array([8, 66, 40]).astype("int32")
+        self.align_corners = True
+"""
+
+
 class TestNearestInterpOp_attr_tensor(XPUOpTest):
     def setUp(self):
         self.use_xpu = True
-- 
GitLab