未验证 提交 b678e43c 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] dropout op bridge and ut (#2745)

上级 b30dc65b
...@@ -40,6 +40,7 @@ lite_cc_library(subgraph_bridge_sqrt_op_npu SRCS sqrt_op.cc DEPS ${npu_subgraph_ ...@@ -40,6 +40,7 @@ lite_cc_library(subgraph_bridge_sqrt_op_npu SRCS sqrt_op.cc DEPS ${npu_subgraph_
lite_cc_library(subgraph_bridge_reduce_mean_op_npu SRCS reduce_mean_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_reduce_mean_op_npu SRCS reduce_mean_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps}) lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_subgraph_bridge_deps})
set(npu_subgraph_bridges set(npu_subgraph_bridges
subgraph_bridge_registry subgraph_bridge_registry
...@@ -67,6 +68,7 @@ set(npu_subgraph_bridges ...@@ -67,6 +68,7 @@ set(npu_subgraph_bridges
subgraph_bridge_reduce_mean_op_npu subgraph_bridge_reduce_mean_op_npu
subgraph_bridge_unsqueeze_op_npu subgraph_bridge_unsqueeze_op_npu
subgraph_bridge_argmax_op_npu subgraph_bridge_argmax_op_npu
subgraph_bridge_dropout_op_npu
CACHE INTERNAL "npu_subgraph_bridges") CACHE INTERNAL "npu_subgraph_bridges")
message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}") message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = x_dims.size();
CHECK_GE(x_rank, 2);
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat));
auto dropout_implementation =
op_info->GetAttr<std::string>("dropout_implementation");
auto scale = 1 - op_info->GetAttr<float>("dropout_prob");
if (dropout_implementation == "upscale_in_train") {
scale = 1.f;
}
// HiAI only support [n, c, 1, 1] for the shape of scale
std::vector<int64_t> scale_shape = {
1, x_rank < 3 ? 1 : x_dims[x_rank - 3], 1, 1};
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x, CvtShape(x_dims));
}
// Scale node
auto scale_node = graph->Add<ge::op::Scale>(out_name);
auto scale_op = scale_node->data<ge::op::Scale>();
scale_op->set_input_x(*x_node->data());
scale_op->set_attr_axis(1);
// Add filter node(fill with scale)
auto filter_node = graph->Add(out_name + "/filter", scale, scale_shape);
scale_op->set_input_filter(*filter_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(dropout,
kNPU,
paddle::lite::subgraph::npu::DropoutConverter);
...@@ -28,6 +28,7 @@ USE_SUBGRAPH_BRIDGE(conv2d, kNPU); ...@@ -28,6 +28,7 @@ USE_SUBGRAPH_BRIDGE(conv2d, kNPU);
USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNPU); USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNPU);
USE_SUBGRAPH_BRIDGE(conv2d_transpose, kNPU); USE_SUBGRAPH_BRIDGE(conv2d_transpose, kNPU);
USE_SUBGRAPH_BRIDGE(dropout, kNPU);
USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU);
USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU); USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU);
USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU); USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU);
......
...@@ -28,7 +28,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH ...@@ -28,7 +28,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
...@@ -41,14 +41,10 @@ class DropoutComputeTester : public arena::TestCase { ...@@ -41,14 +41,10 @@ class DropoutComputeTester : public arena::TestCase {
const std::string& alias, const std::string& alias,
DDim dims, DDim dims,
float dropout_prob, float dropout_prob,
bool fix_seed,
int seed,
std::string dropout_implementation) std::string dropout_implementation)
: TestCase(place, alias), : TestCase(place, alias),
dims_(dims), dims_(dims),
dropout_prob_(dropout_prob), dropout_prob_(dropout_prob),
fix_seed_(fix_seed),
seed_(seed),
dropout_implementation_(dropout_implementation) {} dropout_implementation_(dropout_implementation) {}
void RunBaseline(Scope* scope) override { void RunBaseline(Scope* scope) override {
...@@ -95,7 +91,10 @@ TEST(Dropout, precision) { ...@@ -95,7 +91,10 @@ TEST(Dropout, precision) {
LOG(INFO) << "test dropout op"; LOG(INFO) << "test dropout op";
float abs_error = 2e-5; float abs_error = 2e-5;
Place place; Place place;
#if defined(LITE_WITH_XPU) #if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
#else #else
return; return;
...@@ -106,14 +105,11 @@ TEST(Dropout, precision) { ...@@ -106,14 +105,11 @@ TEST(Dropout, precision) {
for (auto dropout_prob : {0., 0.5, 1.}) { for (auto dropout_prob : {0., 0.5, 1.}) {
for (auto dropout_implementation : for (auto dropout_implementation :
{"downgrade_in_infer", "upscale_in_train"}) { {"downgrade_in_infer", "upscale_in_train"}) {
std::unique_ptr<arena::TestCase> tester( #ifdef LITE_WITH_NPU
new DropoutComputeTester(place, if (dims.size() < 2) continue;
"def", #endif
DDim(dims), std::unique_ptr<arena::TestCase> tester(new DropoutComputeTester(
dropout_prob, place, "def", DDim(dims), dropout_prob, dropout_implementation));
true,
1,
dropout_implementation));
arena::Arena arena(std::move(tester), place, abs_error); arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision({"mask"}); arena.TestPrecision({"mask"});
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册