[NPU] dropout op bridge and ut (#2745)

b678e43c · zhupengyang · GitHub · b30dc65b · b678e43c · b678e43c
5 changed file
--- a/lite/kernels/npu/bridges/CMakeLists.txt
+++ b/lite/kernels/npu/bridges/CMakeLists.txt
@@ -40,6 +40,7 @@ lite_cc_library(subgraph_bridge_sqrt_op_npu SRCS sqrt_op.cc DEPS ${npu_subgraph_
 lite_cc_library(subgraph_bridge_reduce_mean_op_npu SRCS reduce_mean_op.cc DEPS ${npu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_subgraph_bridge_deps})
 set(npu_subgraph_bridges
        subgraph_bridge_registry
@@ -67,6 +68,7 @@ set(npu_subgraph_bridges
        subgraph_bridge_reduce_mean_op_npu
        subgraph_bridge_unsqueeze_op_npu
        subgraph_bridge_argmax_op_npu
+        subgraph_bridge_dropout_op_npu
        CACHE INTERNAL "npu_subgraph_bridges")
 message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}")
--- a/lite/kernels/npu/bridges/dropout_op.cc
+++ b/lite/kernels/npu/bridges/dropout_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "lite/kernels/npu/bridges/graph.h"
+#include "lite/kernels/npu/bridges/registry.h"
+#include "lite/kernels/npu/bridges/utility.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace npu {
+int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  auto scope = op->scope();
+  VLOG(3) << "[NPU] Converting " + op_type + "...";
+  // Get input, output and op attributes
+  auto x_name = op_info->Input("X").front();
+  auto x_type = kernel->GetInputDeclType("X");
+  CHECK(x_type->precision() == PRECISION(kFloat));
+  auto x = scope->FindMutableTensor(x_name);
+  auto x_dims = x->dims();
+  auto x_rank = x_dims.size();
+  CHECK_GE(x_rank, 2);
+  auto out_name = op_info->Output("Out").front();
+  auto out_type = kernel->GetOutputDeclType("Out");
+  CHECK(out_type->precision() == PRECISION(kFloat));
+  auto dropout_implementation =
+      op_info->GetAttr<std::string>("dropout_implementation");
+  auto scale = 1 - op_info->GetAttr<float>("dropout_prob");
+  if (dropout_implementation == "upscale_in_train") {
+    scale = 1.f;
+  }
+  // HiAI only support [n, c, 1, 1] for the shape of scale
+  std::vector<int64_t> scale_shape = {
+      1, x_rank < 3 ? 1 : x_dims[x_rank - 3], 1, 1};
+  // X node
+  std::shared_ptr<Node> x_node = nullptr;
+  if (graph->Has(x_name)) {
+    x_node = graph->Get(x_name);
+  } else {
+    x_node = graph->Add(x_name, *x, CvtShape(x_dims));
+  }
+  // Scale node
+  auto scale_node = graph->Add<ge::op::Scale>(out_name);
+  auto scale_op = scale_node->data<ge::op::Scale>();
+  scale_op->set_input_x(*x_node->data());
+  scale_op->set_attr_axis(1);
+  // Add filter node(fill with scale)
+  auto filter_node = graph->Add(out_name + "/filter", scale, scale_shape);
+  scale_op->set_input_filter(*filter_node->data());
+  return REBUILD_WHEN_SHAPE_CHANGED;
+}
+}  // namespace npu
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(dropout,
+                         kNPU,
+                         paddle::lite::subgraph::npu::DropoutConverter);
--- a/lite/kernels/npu/bridges/paddle_use_bridges.h
+++ b/lite/kernels/npu/bridges/paddle_use_bridges.h
@@ -28,6 +28,7 @@ USE_SUBGRAPH_BRIDGE(conv2d, kNPU);
 USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNPU);
 USE_SUBGRAPH_BRIDGE(conv2d_transpose, kNPU);
+USE_SUBGRAPH_BRIDGE(dropout, kNPU);
 USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU);
 USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU);
 USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU);

--- a/lite/tests/kernels/CMakeLists.txt
+++ b/lite/tests/kernels/CMakeLists.txt
@@ -28,7 +28,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
    lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
-    lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
    lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})

--- a/lite/tests/kernels/dropout_compute_test.cc
+++ b/lite/tests/kernels/dropout_compute_test.cc
@@ -41,14 +41,10 @@ class DropoutComputeTester : public arena::TestCase {
                       const std::string& alias,
                       DDim dims,
                       float dropout_prob,
-                       bool fix_seed,
-                       int seed,
                       std::string dropout_implementation)
      : TestCase(place, alias),
        dims_(dims),
        dropout_prob_(dropout_prob),
-        fix_seed_(fix_seed),
-        seed_(seed),
        dropout_implementation_(dropout_implementation) {}
  void RunBaseline(Scope* scope) override {
@@ -95,7 +91,10 @@ TEST(Dropout, precision) {
  LOG(INFO) << "test dropout op";
  float abs_error = 2e-5;
  Place place;
-#if defined(LITE_WITH_XPU)
+#if defined(LITE_WITH_NPU)
+  place = TARGET(kNPU);
+  abs_error = 1e-2;  // Using fp16 in NPU
+#elif defined(LITE_WITH_XPU)
  place = TARGET(kXPU);
 #else
  return;
@@ -106,14 +105,11 @@ TEST(Dropout, precision) {
    for (auto dropout_prob : {0., 0.5, 1.}) {
      for (auto dropout_implementation :
           {"downgrade_in_infer", "upscale_in_train"}) {
-        std::unique_ptr<arena::TestCase> tester(
+#ifdef LITE_WITH_NPU
-            new DropoutComputeTester(place,
+        if (dims.size() < 2) continue;
-                                     "def",
+#endif
-                                     DDim(dims),
+        std::unique_ptr<arena::TestCase> tester(new DropoutComputeTester(
-                                     dropout_prob,
+            place, "def", DDim(dims), dropout_prob, dropout_implementation));
-                                     true,
-                                     1,
-                                     dropout_implementation));
        arena::Arena arena(std::move(tester), place, abs_error);
        arena.TestPrecision({"mask"});
      }