From dabf181ab30a022dad40a4b2e3dc3100809e3a2a Mon Sep 17 00:00:00 2001 From: zhupengyang <1165938320@qq.com> Date: Tue, 7 Jan 2020 20:33:57 +0800 Subject: [PATCH] [NPU] add host kernels, enhance reshape ut (#2733) test=develop --- lite/kernels/npu/bridges/engine.cc | 6 ++-- lite/kernels/npu/bridges/reshape_op.cc | 30 ++--------------- lite/tests/kernels/CMakeLists.txt | 2 +- lite/tests/kernels/reshape_compute_test.cc | 38 ++++++++++++---------- 4 files changed, 29 insertions(+), 47 deletions(-) diff --git a/lite/kernels/npu/bridges/engine.cc b/lite/kernels/npu/bridges/engine.cc index e7e35831dd..546a235148 100644 --- a/lite/kernels/npu/bridges/engine.cc +++ b/lite/kernels/npu/bridges/engine.cc @@ -57,9 +57,11 @@ int Engine::BuildOriginProgram() { VLOG(3) << "The attr '" << kKernelTypeAttr << "' not found, pick the first kernel for " << op_type; #if defined(LITE_WITH_ARM) - auto kernels = op->CreateKernels({Place{TARGET(kARM)}}); + auto kernels = + op->CreateKernels({Place{TARGET(kARM)}, Place{TARGET(kHost)}}); #elif defined(LITE_WITH_X86) - auto kernels = op->CreateKernels({Place{TARGET(kX86)}}); + auto kernels = + op->CreateKernels({Place{TARGET(kX86)}, Place{TARGET(kHost)}}); #endif CHECK_GT(kernels.size(), 0) << "No kernels found for " << op_type; picked_kernel = std::move(kernels.front()); diff --git a/lite/kernels/npu/bridges/reshape_op.cc b/lite/kernels/npu/bridges/reshape_op.cc index 14f2560066..332572a413 100644 --- a/lite/kernels/npu/bridges/reshape_op.cc +++ b/lite/kernels/npu/bridges/reshape_op.cc @@ -34,14 +34,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Get input and output vars and op attributes auto x_name = op_info->Input("X").front(); auto x_type = kernel->GetInputDeclType("X"); - CHECK(x_type->precision() == PRECISION(kFloat)); - CHECK(x_type->layout() == DATALAYOUT(kNCHW)); auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); + auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); - CHECK(out_type->precision() == PRECISION(kFloat)); - CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node std::shared_ptr x_node = nullptr; @@ -81,6 +78,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, " "but Shape has " << out_shape.size(); + return FAILED; } actual_shape_node = graph->Add(actual_shape_name, @@ -95,34 +93,12 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, " "but shape has " << out_shape.size(); + return FAILED; } reshape_op->set_attr_shape( ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); } - // XShape node - if (op_type == "reshape2") { - // Append an extra reshape node to calc XShape - std::vector xshape_dims(x_dims.size() + 1, 1); - for (size_t i = 0; i < x_dims.size(); i++) { - xshape_dims[i + 1] = x_dims[i]; - } - if (xshape_dims.size() > 4) { - LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, " - "but XShape has " - << xshape_dims.size(); - return FAILED; - } - auto xshape_name = op_info->Output("XShape").front(); - // auto xshape_type = kernel->GetOutputDeclType("XShape"); - // CHECK(xshape_type->precision() == PRECISION(kFloat)); - // CHECK(xshape_type->layout() == DATALAYOUT(kNCHW)); - auto xshape_node = graph->Add(xshape_name); - auto xshape_op = xshape_node->data(); - xshape_op->set_input_tensor(*x_node->data()); - xshape_op->set_attr_shape( - ge::AttrValue::LIST_INT(xshape_dims.begin(), xshape_dims.end())); - } return REBUILD_WHEN_SHAPE_CHANGED; } diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 0a1ff3906f..c089f7099c 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/reshape_compute_test.cc b/lite/tests/kernels/reshape_compute_test.cc index 85cd724148..b82c291a41 100644 --- a/lite/tests/kernels/reshape_compute_test.cc +++ b/lite/tests/kernels/reshape_compute_test.cc @@ -16,6 +16,7 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { @@ -29,19 +30,19 @@ class ReshapeComputeTester : public arena::TestCase { std::string xshape_ = "xshape"; std::vector shape_tensor_vct_; std::string shape_tensor_; - DDim x_dims_; + DDim dims_; std::vector shape_; bool inplace_ = false; public: ReshapeComputeTester(const Place& place, const std::string& alias, - DDim x_dims, + DDim dims, std::vector shape, bool is_shape_tensor_vct = false, bool is_shape_tensor = false, bool is_shape = true) - : TestCase(place, alias), x_dims_(x_dims) { + : TestCase(place, alias), dims_(dims) { if (is_shape_tensor_vct) { for (size_t i = 0; i < shape.size(); i++) { shape_tensor_vct_.emplace_back(op_type_ + "/shape" + std::to_string(i)); @@ -60,7 +61,6 @@ class ReshapeComputeTester : public arena::TestCase { CHECK(out); auto* x = scope->FindTensor(input_); - auto x_dims = x->dims(); std::vector out_shape; if (shape_tensor_vct_.size() > 0) { @@ -86,8 +86,8 @@ class ReshapeComputeTester : public arena::TestCase { CHECK_EQ(unk_dim_idx, -1); unk_dim_idx = i; } else if (out_shape[i] == 0) { - CHECK_LE(i, x_dims.size()); - final_out_shape[i] = x_dims[i]; + CHECK_LE(i, dims_.size()); + final_out_shape[i] = dims_[i]; } else if (out_shape[i] > 0) { final_out_shape[i] = out_shape[i]; } else { @@ -97,18 +97,18 @@ class ReshapeComputeTester : public arena::TestCase { } if (unk_dim_idx > -1) { - final_out_shape[unk_dim_idx] = x_dims.production() / cap; + final_out_shape[unk_dim_idx] = dims_.production() / cap; } out->Resize(final_out_shape); auto x_data = x->data(); auto out_data = out->mutable_data(); - memcpy(out_data, x_data, sizeof(float) * x_dims.production()); + memcpy(out_data, x_data, sizeof(float) * dims_.production()); if (op_type_ == "reshape2") { auto* xshape = scope->NewTensor(xshape_); - auto xshape_dims = x_dims.Vectorize(); + auto xshape_dims = dims_.Vectorize(); xshape_dims.insert(xshape_dims.begin(), 0); xshape->Resize(xshape_dims); } @@ -134,11 +134,9 @@ class ReshapeComputeTester : public arena::TestCase { } void PrepareData() override { - std::vector data(x_dims_.production()); - for (int i = 0; i < x_dims_.production(); i++) { - data[i] = i * 1.1; - } - SetCommonTensor(input_, x_dims_, data.data()); + std::vector din(dims_.production()); + fill_data_rand(din.data(), -1.f, 1.f, dims_.production()); + SetCommonTensor(input_, dims_, din.data()); if (shape_tensor_vct_.size() > 0) { for (size_t i = 0; i < shape_.size(); i++) { @@ -161,13 +159,16 @@ TEST(Reshape, precision) { LOG(INFO) << "test Reshape op"; float abs_error = 2e-5; Place place; -#ifdef LITE_WITH_XPU +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 1e-2; // Using fp16 in NPU +#elif defined(LITE_WITH_XPU) place = TARGET(kXPU); #else return; #endif - DDim x_dims{{2, 3, 4, 5}}; + DDim dims{{2, 3, 4, 5}}; std::vector> shapes{{5, 4, 3, 2}, {2, 3, 20}, {2, 60}, @@ -176,8 +177,11 @@ TEST(Reshape, precision) { {0, 0, 20}, {0, 0, -1}}; for (auto shape : shapes) { +#ifdef LITE_WITH_NPU + if (dims.size() > 4 || shape.size() > 4) continue; +#endif std::unique_ptr tester( - new ReshapeComputeTester(place, "def", x_dims, shape)); + new ReshapeComputeTester(place, "def", dims, shape)); arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision({"xshape"}); } -- GitLab