提交 dabf181a 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] add host kernels, enhance reshape ut (#2733)

test=develop
上级 2c01e3a9
...@@ -57,9 +57,11 @@ int Engine::BuildOriginProgram() { ...@@ -57,9 +57,11 @@ int Engine::BuildOriginProgram() {
VLOG(3) << "The attr '" << kKernelTypeAttr VLOG(3) << "The attr '" << kKernelTypeAttr
<< "' not found, pick the first kernel for " << op_type; << "' not found, pick the first kernel for " << op_type;
#if defined(LITE_WITH_ARM) #if defined(LITE_WITH_ARM)
auto kernels = op->CreateKernels({Place{TARGET(kARM)}}); auto kernels =
op->CreateKernels({Place{TARGET(kARM)}, Place{TARGET(kHost)}});
#elif defined(LITE_WITH_X86) #elif defined(LITE_WITH_X86)
auto kernels = op->CreateKernels({Place{TARGET(kX86)}}); auto kernels =
op->CreateKernels({Place{TARGET(kX86)}, Place{TARGET(kHost)}});
#endif #endif
CHECK_GT(kernels.size(), 0) << "No kernels found for " << op_type; CHECK_GT(kernels.size(), 0) << "No kernels found for " << op_type;
picked_kernel = std::move(kernels.front()); picked_kernel = std::move(kernels.front());
......
...@@ -34,14 +34,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -34,14 +34,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes // Get input and output vars and op attributes
auto x_name = op_info->Input("X").front(); auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X"); auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name); auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims(); auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front(); auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out"); auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
// X node // X node
std::shared_ptr<Node> x_node = nullptr; std::shared_ptr<Node> x_node = nullptr;
...@@ -81,6 +78,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -81,6 +78,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, " LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
"but Shape has " "but Shape has "
<< out_shape.size(); << out_shape.size();
return FAILED;
} }
actual_shape_node = actual_shape_node =
graph->Add(actual_shape_name, graph->Add(actual_shape_name,
...@@ -95,34 +93,12 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { ...@@ -95,34 +93,12 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, " LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
"but shape has " "but shape has "
<< out_shape.size(); << out_shape.size();
return FAILED;
} }
reshape_op->set_attr_shape( reshape_op->set_attr_shape(
ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end()));
} }
// XShape node
if (op_type == "reshape2") {
// Append an extra reshape node to calc XShape
std::vector<int64_t> xshape_dims(x_dims.size() + 1, 1);
for (size_t i = 0; i < x_dims.size(); i++) {
xshape_dims[i + 1] = x_dims[i];
}
if (xshape_dims.size() > 4) {
LOG(WARNING) << "[NPU] HiAI DDK only supports less than 4 dimensions, "
"but XShape has "
<< xshape_dims.size();
return FAILED;
}
auto xshape_name = op_info->Output("XShape").front();
// auto xshape_type = kernel->GetOutputDeclType("XShape");
// CHECK(xshape_type->precision() == PRECISION(kFloat));
// CHECK(xshape_type->layout() == DATALAYOUT(kNCHW));
auto xshape_node = graph->Add<ge::op::Reshape>(xshape_name);
auto xshape_op = xshape_node->data<ge::op::Reshape>();
xshape_op->set_input_tensor(*x_node->data());
xshape_op->set_attr_shape(
ge::AttrValue::LIST_INT(xshape_dims.begin(), xshape_dims.end()));
}
return REBUILD_WHEN_SHAPE_CHANGED; return REBUILD_WHEN_SHAPE_CHANGED;
} }
......
...@@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH ...@@ -26,7 +26,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
#lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h" #include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h" #include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -29,19 +30,19 @@ class ReshapeComputeTester : public arena::TestCase { ...@@ -29,19 +30,19 @@ class ReshapeComputeTester : public arena::TestCase {
std::string xshape_ = "xshape"; std::string xshape_ = "xshape";
std::vector<std::string> shape_tensor_vct_; std::vector<std::string> shape_tensor_vct_;
std::string shape_tensor_; std::string shape_tensor_;
DDim x_dims_; DDim dims_;
std::vector<int> shape_; std::vector<int> shape_;
bool inplace_ = false; bool inplace_ = false;
public: public:
ReshapeComputeTester(const Place& place, ReshapeComputeTester(const Place& place,
const std::string& alias, const std::string& alias,
DDim x_dims, DDim dims,
std::vector<int> shape, std::vector<int> shape,
bool is_shape_tensor_vct = false, bool is_shape_tensor_vct = false,
bool is_shape_tensor = false, bool is_shape_tensor = false,
bool is_shape = true) bool is_shape = true)
: TestCase(place, alias), x_dims_(x_dims) { : TestCase(place, alias), dims_(dims) {
if (is_shape_tensor_vct) { if (is_shape_tensor_vct) {
for (size_t i = 0; i < shape.size(); i++) { for (size_t i = 0; i < shape.size(); i++) {
shape_tensor_vct_.emplace_back(op_type_ + "/shape" + std::to_string(i)); shape_tensor_vct_.emplace_back(op_type_ + "/shape" + std::to_string(i));
...@@ -60,7 +61,6 @@ class ReshapeComputeTester : public arena::TestCase { ...@@ -60,7 +61,6 @@ class ReshapeComputeTester : public arena::TestCase {
CHECK(out); CHECK(out);
auto* x = scope->FindTensor(input_); auto* x = scope->FindTensor(input_);
auto x_dims = x->dims();
std::vector<int> out_shape; std::vector<int> out_shape;
if (shape_tensor_vct_.size() > 0) { if (shape_tensor_vct_.size() > 0) {
...@@ -86,8 +86,8 @@ class ReshapeComputeTester : public arena::TestCase { ...@@ -86,8 +86,8 @@ class ReshapeComputeTester : public arena::TestCase {
CHECK_EQ(unk_dim_idx, -1); CHECK_EQ(unk_dim_idx, -1);
unk_dim_idx = i; unk_dim_idx = i;
} else if (out_shape[i] == 0) { } else if (out_shape[i] == 0) {
CHECK_LE(i, x_dims.size()); CHECK_LE(i, dims_.size());
final_out_shape[i] = x_dims[i]; final_out_shape[i] = dims_[i];
} else if (out_shape[i] > 0) { } else if (out_shape[i] > 0) {
final_out_shape[i] = out_shape[i]; final_out_shape[i] = out_shape[i];
} else { } else {
...@@ -97,18 +97,18 @@ class ReshapeComputeTester : public arena::TestCase { ...@@ -97,18 +97,18 @@ class ReshapeComputeTester : public arena::TestCase {
} }
if (unk_dim_idx > -1) { if (unk_dim_idx > -1) {
final_out_shape[unk_dim_idx] = x_dims.production() / cap; final_out_shape[unk_dim_idx] = dims_.production() / cap;
} }
out->Resize(final_out_shape); out->Resize(final_out_shape);
auto x_data = x->data<float>(); auto x_data = x->data<float>();
auto out_data = out->mutable_data<float>(); auto out_data = out->mutable_data<float>();
memcpy(out_data, x_data, sizeof(float) * x_dims.production()); memcpy(out_data, x_data, sizeof(float) * dims_.production());
if (op_type_ == "reshape2") { if (op_type_ == "reshape2") {
auto* xshape = scope->NewTensor(xshape_); auto* xshape = scope->NewTensor(xshape_);
auto xshape_dims = x_dims.Vectorize(); auto xshape_dims = dims_.Vectorize();
xshape_dims.insert(xshape_dims.begin(), 0); xshape_dims.insert(xshape_dims.begin(), 0);
xshape->Resize(xshape_dims); xshape->Resize(xshape_dims);
} }
...@@ -134,11 +134,9 @@ class ReshapeComputeTester : public arena::TestCase { ...@@ -134,11 +134,9 @@ class ReshapeComputeTester : public arena::TestCase {
} }
void PrepareData() override { void PrepareData() override {
std::vector<float> data(x_dims_.production()); std::vector<float> din(dims_.production());
for (int i = 0; i < x_dims_.production(); i++) { fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
data[i] = i * 1.1; SetCommonTensor(input_, dims_, din.data());
}
SetCommonTensor(input_, x_dims_, data.data());
if (shape_tensor_vct_.size() > 0) { if (shape_tensor_vct_.size() > 0) {
for (size_t i = 0; i < shape_.size(); i++) { for (size_t i = 0; i < shape_.size(); i++) {
...@@ -161,13 +159,16 @@ TEST(Reshape, precision) { ...@@ -161,13 +159,16 @@ TEST(Reshape, precision) {
LOG(INFO) << "test Reshape op"; LOG(INFO) << "test Reshape op";
float abs_error = 2e-5; float abs_error = 2e-5;
Place place; Place place;
#ifdef LITE_WITH_XPU #if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU); place = TARGET(kXPU);
#else #else
return; return;
#endif #endif
DDim x_dims{{2, 3, 4, 5}}; DDim dims{{2, 3, 4, 5}};
std::vector<std::vector<int>> shapes{{5, 4, 3, 2}, std::vector<std::vector<int>> shapes{{5, 4, 3, 2},
{2, 3, 20}, {2, 3, 20},
{2, 60}, {2, 60},
...@@ -176,8 +177,11 @@ TEST(Reshape, precision) { ...@@ -176,8 +177,11 @@ TEST(Reshape, precision) {
{0, 0, 20}, {0, 0, 20},
{0, 0, -1}}; {0, 0, -1}};
for (auto shape : shapes) { for (auto shape : shapes) {
#ifdef LITE_WITH_NPU
if (dims.size() > 4 || shape.size() > 4) continue;
#endif
std::unique_ptr<arena::TestCase> tester( std::unique_ptr<arena::TestCase> tester(
new ReshapeComputeTester(place, "def", x_dims, shape)); new ReshapeComputeTester(place, "def", dims, shape));
arena::Arena arena(std::move(tester), place, abs_error); arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision({"xshape"}); arena.TestPrecision({"xshape"});
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册