From dabf181ab30a022dad40a4b2e3dc3100809e3a2a Mon Sep 17 00:00:00 2001
From: zhupengyang <1165938320@qq.com>
Date: Tue, 7 Jan 2020 20:33:57 +0800
Subject: [PATCH] [NPU] add host kernels, enhance reshape ut (#2733)

test=develop
---
 lite/kernels/npu/bridges/engine.cc         |  6 ++--
 lite/kernels/npu/bridges/reshape_op.cc     | 30 ++---------------
 lite/tests/kernels/CMakeLists.txt          |  2 +-
 lite/tests/kernels/reshape_compute_test.cc | 38 ++++++++++++----------
 4 files changed, 29 insertions(+), 47 deletions(-)
diff --git a/lite/kernels/npu/bridges/engine.cc b/lite/kernels/npu/bridges/engine.cc
index e7e35831dd..546a235148 100644
--- a/lite/kernels/npu/bridges/engine.cc
+++ b/lite/kernels/npu/bridges/engine.cc
@@ -57,9 +57,11 @@ int Engine::BuildOriginProgram() {
       VLOG(3) << "The attr '" << kKernelTypeAttr
               << "' not found, pick the first kernel for " << op_type;
 #if defined(LITE_WITH_ARM)
-      auto kernels = op->CreateKernels({Place{TARGET(kARM)}});
+      auto kernels =
+          op->CreateKernels({Place{TARGET(kARM)}, Place{TARGET(kHost)}});
 #elif defined(LITE_WITH_X86)
-      auto kernels = op->CreateKernels({Place{TARGET(kX86)}});
+      auto kernels =
+          op->CreateKernels({Place{TARGET(kX86)}, Place{TARGET(kHost)}});
 #endif
       CHECK_GT(kernels.size(), 0) << "No kernels found for " << op_type;
       picked_kernel = std::move(kernels.front());
diff --git a/lite/kernels/npu/bridges/reshape_op.cc b/lite/kernels/npu/bridges/reshape_op.cc
index 14f2560066..332572a413 100644
--- a/lite/kernels/npu/bridges/reshape_op.cc
+++ b/lite/kernels/npu/bridges/reshape_op.cc
@@ -34,14 +34,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
   // Get input and output vars and op attributes
   auto x_name = op_info->Input("X").front();
   auto x_type = kernel->GetInputDeclType("X");
-  CHECK(x_type->precision() == PRECISION(kFloat));
-  CHECK(x_type->layout() == DATALAYOUT(kNCHW));
   auto x = scope->FindMutableTensor(x_name);
   auto x_dims = x->dims();
+
   auto out_name = op_info->Output("Out").front();
   auto out_type = kernel->GetOutputDeclType("Out");
-  CHECK(out_type->precision() == PRECISION(kFloat));
-  CHECK(out_type->layout() == DATALAYOUT(kNCHW));
 
   // X node
   std::shared_ptr<Node> x_node = nullptr;
@@ -81,6 +78,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
         LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
                         "but Shape has "
                      << out_shape.size();
+        return FAILED;
       }
       actual_shape_node =
           graph->Add(actual_shape_name,
@@ -95,34 +93,12 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
       LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
                       "but shape has "
                    << out_shape.size();
+      return FAILED;
     }
     reshape_op->set_attr_shape(
         ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end()));
   }
 
-  // XShape node
-  if (op_type == "reshape2") {
-    // Append an extra reshape node to calc XShape
-    std::vector<int64_t> xshape_dims(x_dims.size() + 1, 1);
-    for (size_t i = 0; i < x_dims.size(); i++) {
-      xshape_dims[i + 1] = x_dims[i];
-    }
-    if (xshape_dims.size() > 4) {
-      LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
-                      "but XShape has "
-                   << xshape_dims.size();
-      return FAILED;
-    }
-    auto xshape_name = op_info->Output("XShape").front();
-    // auto xshape_type = kernel->GetOutputDeclType("XShape");
-    // CHECK(xshape_type->precision() == PRECISION(kFloat));
-    // CHECK(xshape_type->layout() == DATALAYOUT(kNCHW));
-    auto xshape_node = graph->Add<ge::op::Reshape>(xshape_name);
-    auto xshape_op = xshape_node->data<ge::op::Reshape>();
-    xshape_op->set_input_tensor(*x_node->data());
-    xshape_op->set_attr_shape(
-        ge::AttrValue::LIST_INT(xshape_dims.begin(), xshape_dims.end()));
-  }
   return REBUILD_WHEN_SHAPE_CHANGED;
 }
 
diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt
index 0a1ff3906f..c089f7099c 100644
--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
     #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
-    lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
diff --git a/lite/tests/kernels/reshape_compute_test.cc b/lite/tests/kernels/reshape_compute_test.cc
index 85cd724148..b82c291a41 100644
--- a/lite/tests/kernels/reshape_compute_test.cc
+++ b/lite/tests/kernels/reshape_compute_test.cc
@@ -16,6 +16,7 @@
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/core/arena/framework.h"
+#include "lite/tests/utils/fill_data.h"
 
 namespace paddle {
 namespace lite {
@@ -29,19 +30,19 @@ class ReshapeComputeTester : public arena::TestCase {
   std::string xshape_ = "xshape";
   std::vector<std::string> shape_tensor_vct_;
   std::string shape_tensor_;
-  DDim x_dims_;
+  DDim dims_;
   std::vector<int> shape_;
   bool inplace_ = false;
 
  public:
   ReshapeComputeTester(const Place& place,
                        const std::string& alias,
-                       DDim x_dims,
+                       DDim dims,
                        std::vector<int> shape,
                        bool is_shape_tensor_vct = false,
                        bool is_shape_tensor = false,
                        bool is_shape = true)
-      : TestCase(place, alias), x_dims_(x_dims) {
+      : TestCase(place, alias), dims_(dims) {
     if (is_shape_tensor_vct) {
       for (size_t i = 0; i < shape.size(); i++) {
         shape_tensor_vct_.emplace_back(op_type_ + "/shape" + std::to_string(i));
@@ -60,7 +61,6 @@ class ReshapeComputeTester : public arena::TestCase {
     CHECK(out);
 
     auto* x = scope->FindTensor(input_);
-    auto x_dims = x->dims();
 
     std::vector<int> out_shape;
     if (shape_tensor_vct_.size() > 0) {
@@ -86,8 +86,8 @@ class ReshapeComputeTester : public arena::TestCase {
         CHECK_EQ(unk_dim_idx, -1);
         unk_dim_idx = i;
       } else if (out_shape[i] == 0) {
-        CHECK_LE(i, x_dims.size());
-        final_out_shape[i] = x_dims[i];
+        CHECK_LE(i, dims_.size());
+        final_out_shape[i] = dims_[i];
       } else if (out_shape[i] > 0) {
         final_out_shape[i] = out_shape[i];
       } else {
@@ -97,18 +97,18 @@ class ReshapeComputeTester : public arena::TestCase {
     }
 
     if (unk_dim_idx > -1) {
-      final_out_shape[unk_dim_idx] = x_dims.production() / cap;
+      final_out_shape[unk_dim_idx] = dims_.production() / cap;
     }
 
     out->Resize(final_out_shape);
 
     auto x_data = x->data<float>();
     auto out_data = out->mutable_data<float>();
-    memcpy(out_data, x_data, sizeof(float) * x_dims.production());
+    memcpy(out_data, x_data, sizeof(float) * dims_.production());
 
     if (op_type_ == "reshape2") {
       auto* xshape = scope->NewTensor(xshape_);
-      auto xshape_dims = x_dims.Vectorize();
+      auto xshape_dims = dims_.Vectorize();
       xshape_dims.insert(xshape_dims.begin(), 0);
       xshape->Resize(xshape_dims);
     }
@@ -134,11 +134,9 @@ class ReshapeComputeTester : public arena::TestCase {
   }
 
   void PrepareData() override {
-    std::vector<float> data(x_dims_.production());
-    for (int i = 0; i < x_dims_.production(); i++) {
-      data[i] = i * 1.1;
-    }
-    SetCommonTensor(input_, x_dims_, data.data());
+    std::vector<float> din(dims_.production());
+    fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
+    SetCommonTensor(input_, dims_, din.data());
 
     if (shape_tensor_vct_.size() > 0) {
       for (size_t i = 0; i < shape_.size(); i++) {
@@ -161,13 +159,16 @@ TEST(Reshape, precision) {
   LOG(INFO) << "test Reshape op";
   float abs_error = 2e-5;
   Place place;
-#ifdef LITE_WITH_XPU
+#if defined(LITE_WITH_NPU)
+  place = TARGET(kNPU);
+  abs_error = 1e-2;  // Using fp16 in NPU
+#elif defined(LITE_WITH_XPU)
   place = TARGET(kXPU);
 #else
   return;
 #endif
 
-  DDim x_dims{{2, 3, 4, 5}};
+  DDim dims{{2, 3, 4, 5}};
   std::vector<std::vector<int>> shapes{{5, 4, 3, 2},
                                        {2, 3, 20},
                                        {2, 60},
@@ -176,8 +177,11 @@ TEST(Reshape, precision) {
                                        {0, 0, 20},
                                        {0, 0, -1}};
   for (auto shape : shapes) {
+#ifdef LITE_WITH_NPU
+    if (dims.size() > 4 || shape.size() > 4) continue;
+#endif
     std::unique_ptr<arena::TestCase> tester(
-        new ReshapeComputeTester(place, "def", x_dims, shape));
+        new ReshapeComputeTester(place, "def", dims, shape));
     arena::Arena arena(std::move(tester), place, abs_error);
     arena.TestPrecision({"xshape"});
   }
-- 
GitLab