From 1716db63ef5611aa52c60a572787057f5b983c7e Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Wed, 9 Sep 2020 10:43:37 +0800 Subject: [PATCH] [xpu] fix xpu unittests: cast, transpose, pool, layer_norm, elementwise, multiclass_nms (#4259) --- lite/kernels/xpu/bridges/cast_op.cc | 3 ++- lite/kernels/xpu/subgraph_compute.cc | 13 ++++++--- lite/tests/kernels/cast_compute_test.cc | 4 +-- .../tests/kernels/elementwise_compute_test.cc | 10 +++++-- lite/tests/kernels/layer_norm_compute_test.cc | 4 +-- .../kernels/multiclass_nms_compute_test.cc | 2 -- lite/tests/kernels/pool_compute_test.cc | 11 ++++++-- lite/tests/kernels/transpose_compute_test.cc | 27 ++++++++++++++----- 8 files changed, 52 insertions(+), 22 deletions(-) diff --git a/lite/kernels/xpu/bridges/cast_op.cc b/lite/kernels/xpu/bridges/cast_op.cc index 056822feb5..a87beb93db 100644 --- a/lite/kernels/xpu/bridges/cast_op.cc +++ b/lite/kernels/xpu/bridges/cast_op.cc @@ -88,7 +88,8 @@ int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Cast node graph->Add( out_name, - graph->builder_.CreateCast(*x_node->data(), CvtPrecisionType(out_ptype))); + graph->builder_.CreateCast(*x_node->data(), CvtPrecisionType(out_ptype)), + PrecisionType(out_ptype)); return SUCCESS; } diff --git a/lite/kernels/xpu/subgraph_compute.cc b/lite/kernels/xpu/subgraph_compute.cc index 52d960118b..2e550bfc2d 100644 --- a/lite/kernels/xpu/subgraph_compute.cc +++ b/lite/kernels/xpu/subgraph_compute.cc @@ -54,6 +54,7 @@ bool SubgraphEngine::BuildDeviceProgram() { return false; } } + // Collect the input and output nodes of the XPU IR graph std::vector device_inodes; std::vector device_onodes; @@ -62,16 +63,20 @@ bool SubgraphEngine::BuildDeviceProgram() { CHECK(graph.Get(input_names_[i])->is_data()); device_inodes.push_back(graph.Get(input_names_[i])->data().get()); } - std::vector valid_output_names; for (size_t i = 0; i < output_names_.size(); i++) { if (graph.Has(output_names_[i])) { device_onodes.push_back(graph.Get(output_names_[i])->data().get()); - valid_output_names.push_back(output_names_[i]); + } else { + // update output_names_ and origin_otensors because some outputs may be + // useless + output_names_.erase(output_names_.begin() + i); + origin_otensors_.erase(origin_otensors_.begin() + i); + i--; } } - // update output_names_ because some outputs may be useless - output_names_ = valid_output_names; CHECK_GT(output_names_.size(), 0); + CHECK_EQ(output_names_.size(), origin_otensors_.size()); + // Build the XPU IR graph to the XPU runtime for inference device_program_ = lite::xpu::Device::Global().Build( &graph.builder_, &graph.params_, &device_onodes); diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc index 7acccc015e..e0edb3c54e 100644 --- a/lite/tests/kernels/cast_compute_test.cc +++ b/lite/tests/kernels/cast_compute_test.cc @@ -135,8 +135,8 @@ TEST(Cast, precision) { float abs_error = 2e-5; #if defined(LITE_WITH_ARM) place = TARGET(kARM); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 diff --git a/lite/tests/kernels/elementwise_compute_test.cc b/lite/tests/kernels/elementwise_compute_test.cc index e62a991494..a4904080ab 100644 --- a/lite/tests/kernels/elementwise_compute_test.cc +++ b/lite/tests/kernels/elementwise_compute_test.cc @@ -182,6 +182,12 @@ void TestElt(Place place, std::vector y_shape, int axis, std::string act_type = "") { +#if defined(LITE_WITH_XPU) + if ((y_shape.size() != 1 && x_shape.size() != y_shape.size()) || + elt_type != std::string("add") || !act_type.empty()) { + return; + } +#endif std::unique_ptr tester(new ElementwiseComputeTester( place, "def", elt_type, x_shape, y_shape, axis, act_type)); arena::Arena arena(std::move(tester), place, abs_error); @@ -231,8 +237,8 @@ TEST(Elementwise, precision) { abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/layer_norm_compute_test.cc b/lite/tests/kernels/layer_norm_compute_test.cc index fa483049d9..686fb7e912 100644 --- a/lite/tests/kernels/layer_norm_compute_test.cc +++ b/lite/tests/kernels/layer_norm_compute_test.cc @@ -156,8 +156,8 @@ TEST(LayerNorm, precision) { #elif defined(LITE_WITH_ARM) place = TARGET(kARM); abs_error = 6e-5; -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/multiclass_nms_compute_test.cc b/lite/tests/kernels/multiclass_nms_compute_test.cc index febee0d08e..30ced1bf93 100644 --- a/lite/tests/kernels/multiclass_nms_compute_test.cc +++ b/lite/tests/kernels/multiclass_nms_compute_test.cc @@ -478,8 +478,6 @@ TEST(multiclass_nms, precision) { Place place; #if defined(LITE_WITH_ARM) place = TARGET(kHost); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/pool_compute_test.cc b/lite/tests/kernels/pool_compute_test.cc index 3e2d638aff..67029c3f27 100644 --- a/lite/tests/kernels/pool_compute_test.cc +++ b/lite/tests/kernels/pool_compute_test.cc @@ -313,6 +313,7 @@ void TestPoolPaddings(Place place, float abs_error = 2e-5) { for (auto pooling_type : {"max", "avg"}) { TestPoolHelper( place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {0, 0}, {2, 2}); +#if !defined(LITE_WITH_XPU) TestPoolHelper( place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {1, 1}, {2, 2}); TestPoolHelper(place, @@ -336,6 +337,7 @@ void TestPoolPaddings(Place place, float abs_error = 2e-5) { {1, 1}, {1, 0, 0, 1}, {2, 2}); +#endif } } @@ -349,6 +351,7 @@ void TestPoolKsize(Place place, float abs_error = 2e-5) { {1, 1}, {0, 0}, {ksize, ksize}); +#if !defined(LITE_WITH_XPU) TestPoolHelper(place, abs_error, {2, 3, 6, 7}, @@ -356,12 +359,16 @@ void TestPoolKsize(Place place, float abs_error = 2e-5) { {2, 2}, {1, 1}, {ksize, ksize}); +#endif } } } void TestPoolCeilMode(Place place, float abs_error = 2e-5) { for (auto pooling_type : {"max", "avg"}) { +#if defined(LITE_WITH_XPU) + if (pooling_type == std::string("max")) continue; +#endif TestPoolHelper(place, abs_error, {2, 3, 6, 6}, @@ -384,8 +391,8 @@ TEST(Pool, precision) { #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/transpose_compute_test.cc b/lite/tests/kernels/transpose_compute_test.cc index b07272b997..ee297c82f9 100644 --- a/lite/tests/kernels/transpose_compute_test.cc +++ b/lite/tests/kernels/transpose_compute_test.cc @@ -127,7 +127,12 @@ class TransposeComputeTester : public arena::TestCase { void TestTranspose2D(Place place, float abs_error) { DDim x_dims{{4, 5}}; - std::vector> axes{{0, 1}, {1, 0}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1}, +#endif + {1, 0}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -138,8 +143,12 @@ void TestTranspose2D(Place place, float abs_error) { void TestTranspose3D(Place place, float abs_error) { DDim x_dims{{3, 4, 5}}; - std::vector> axes{ - {0, 1, 2}, {0, 2, 1}, {1, 0, 2}, {2, 1, 0}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1, 2}, +#endif + {0, 2, 1}, {1, 0, 2}, {2, 1, 0}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -150,8 +159,12 @@ void TestTranspose3D(Place place, float abs_error) { void TestTranspose4D(Place place, float abs_error) { DDim x_dims{{2, 3, 4, 5}}; - std::vector> axes{ - {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {3, 1, 2, 0}, {3, 1, 0, 2}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {3, 1, 2, 0}, {3, 1, 0, 2}, +#endif + {0, 2, 3, 1}, {0, 3, 1, 2}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -170,8 +183,8 @@ TEST(Transpose, precision) { #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif -- GitLab