diff --git a/lite/kernels/xpu/bridges/cast_op.cc b/lite/kernels/xpu/bridges/cast_op.cc index 056822feb54b3859afa49c75d9fc8ccb19a48520..a87beb93db184d233e691d5e44a30e26780d8098 100644 --- a/lite/kernels/xpu/bridges/cast_op.cc +++ b/lite/kernels/xpu/bridges/cast_op.cc @@ -88,7 +88,8 @@ int CastConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Cast node graph->Add( out_name, - graph->builder_.CreateCast(*x_node->data(), CvtPrecisionType(out_ptype))); + graph->builder_.CreateCast(*x_node->data(), CvtPrecisionType(out_ptype)), + PrecisionType(out_ptype)); return SUCCESS; } diff --git a/lite/kernels/xpu/subgraph_compute.cc b/lite/kernels/xpu/subgraph_compute.cc index 52d960118b42a7a3c36bf805d4b6464e4a7c937a..2e550bfc2d128e72b6b846cb1d9087ccd23bdf3d 100644 --- a/lite/kernels/xpu/subgraph_compute.cc +++ b/lite/kernels/xpu/subgraph_compute.cc @@ -54,6 +54,7 @@ bool SubgraphEngine::BuildDeviceProgram() { return false; } } + // Collect the input and output nodes of the XPU IR graph std::vector device_inodes; std::vector device_onodes; @@ -62,16 +63,20 @@ bool SubgraphEngine::BuildDeviceProgram() { CHECK(graph.Get(input_names_[i])->is_data()); device_inodes.push_back(graph.Get(input_names_[i])->data().get()); } - std::vector valid_output_names; for (size_t i = 0; i < output_names_.size(); i++) { if (graph.Has(output_names_[i])) { device_onodes.push_back(graph.Get(output_names_[i])->data().get()); - valid_output_names.push_back(output_names_[i]); + } else { + // update output_names_ and origin_otensors because some outputs may be + // useless + output_names_.erase(output_names_.begin() + i); + origin_otensors_.erase(origin_otensors_.begin() + i); + i--; } } - // update output_names_ because some outputs may be useless - output_names_ = valid_output_names; CHECK_GT(output_names_.size(), 0); + CHECK_EQ(output_names_.size(), origin_otensors_.size()); + // Build the XPU IR graph to the XPU runtime for inference device_program_ = lite::xpu::Device::Global().Build( &graph.builder_, &graph.params_, &device_onodes); diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc index 7acccc015e8afbdaab18c711db5aa9b703b6d470..e0edb3c54e38b2e4387a5886ae6f74facd5752ba 100644 --- a/lite/tests/kernels/cast_compute_test.cc +++ b/lite/tests/kernels/cast_compute_test.cc @@ -135,8 +135,8 @@ TEST(Cast, precision) { float abs_error = 2e-5; #if defined(LITE_WITH_ARM) place = TARGET(kARM); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 diff --git a/lite/tests/kernels/elementwise_compute_test.cc b/lite/tests/kernels/elementwise_compute_test.cc index e62a991494b6035a1d9295f1d5f24d2f35bdb9ab..a4904080abfb565ba61f85c914adae55bcee3b8c 100644 --- a/lite/tests/kernels/elementwise_compute_test.cc +++ b/lite/tests/kernels/elementwise_compute_test.cc @@ -182,6 +182,12 @@ void TestElt(Place place, std::vector y_shape, int axis, std::string act_type = "") { +#if defined(LITE_WITH_XPU) + if ((y_shape.size() != 1 && x_shape.size() != y_shape.size()) || + elt_type != std::string("add") || !act_type.empty()) { + return; + } +#endif std::unique_ptr tester(new ElementwiseComputeTester( place, "def", elt_type, x_shape, y_shape, axis, act_type)); arena::Arena arena(std::move(tester), place, abs_error); @@ -231,8 +237,8 @@ TEST(Elementwise, precision) { abs_error = 1e-2; // precision_mode default is force_fp16 #elif defined(LITE_WITH_ARM) place = TARGET(kARM); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/layer_norm_compute_test.cc b/lite/tests/kernels/layer_norm_compute_test.cc index fa483049d9043ac57d6f05ad328fd75ad25edcc6..686fb7e9128bbc89d0528cee6b0bdbbf007d33fe 100644 --- a/lite/tests/kernels/layer_norm_compute_test.cc +++ b/lite/tests/kernels/layer_norm_compute_test.cc @@ -156,8 +156,8 @@ TEST(LayerNorm, precision) { #elif defined(LITE_WITH_ARM) place = TARGET(kARM); abs_error = 6e-5; -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/multiclass_nms_compute_test.cc b/lite/tests/kernels/multiclass_nms_compute_test.cc index febee0d08e7533124768e32e7b671f20b2f30025..30ced1bf9384c802b0bfeab6f023ad7ba7dd415f 100644 --- a/lite/tests/kernels/multiclass_nms_compute_test.cc +++ b/lite/tests/kernels/multiclass_nms_compute_test.cc @@ -478,8 +478,6 @@ TEST(multiclass_nms, precision) { Place place; #if defined(LITE_WITH_ARM) place = TARGET(kHost); -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) -// place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/pool_compute_test.cc b/lite/tests/kernels/pool_compute_test.cc index 3e2d638affe7924b96b22d3a9b9332d199f6ec6c..67029c3f27f934c20858adea6a139e6e986f2052 100644 --- a/lite/tests/kernels/pool_compute_test.cc +++ b/lite/tests/kernels/pool_compute_test.cc @@ -313,6 +313,7 @@ void TestPoolPaddings(Place place, float abs_error = 2e-5) { for (auto pooling_type : {"max", "avg"}) { TestPoolHelper( place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {0, 0}, {2, 2}); +#if !defined(LITE_WITH_XPU) TestPoolHelper( place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {1, 1}, {2, 2}); TestPoolHelper(place, @@ -336,6 +337,7 @@ void TestPoolPaddings(Place place, float abs_error = 2e-5) { {1, 1}, {1, 0, 0, 1}, {2, 2}); +#endif } } @@ -349,6 +351,7 @@ void TestPoolKsize(Place place, float abs_error = 2e-5) { {1, 1}, {0, 0}, {ksize, ksize}); +#if !defined(LITE_WITH_XPU) TestPoolHelper(place, abs_error, {2, 3, 6, 7}, @@ -356,12 +359,16 @@ void TestPoolKsize(Place place, float abs_error = 2e-5) { {2, 2}, {1, 1}, {ksize, ksize}); +#endif } } } void TestPoolCeilMode(Place place, float abs_error = 2e-5) { for (auto pooling_type : {"max", "avg"}) { +#if defined(LITE_WITH_XPU) + if (pooling_type == std::string("max")) continue; +#endif TestPoolHelper(place, abs_error, {2, 3, 6, 6}, @@ -384,8 +391,8 @@ TEST(Pool, precision) { #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT + place = TARGET(kXPU); #else return; #endif diff --git a/lite/tests/kernels/transpose_compute_test.cc b/lite/tests/kernels/transpose_compute_test.cc index b07272b99703f95bed278246fdf04be6696ec572..ee297c82f958a213fbf5cdc20c8c1dd9788a66f0 100644 --- a/lite/tests/kernels/transpose_compute_test.cc +++ b/lite/tests/kernels/transpose_compute_test.cc @@ -127,7 +127,12 @@ class TransposeComputeTester : public arena::TestCase { void TestTranspose2D(Place place, float abs_error) { DDim x_dims{{4, 5}}; - std::vector> axes{{0, 1}, {1, 0}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1}, +#endif + {1, 0}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -138,8 +143,12 @@ void TestTranspose2D(Place place, float abs_error) { void TestTranspose3D(Place place, float abs_error) { DDim x_dims{{3, 4, 5}}; - std::vector> axes{ - {0, 1, 2}, {0, 2, 1}, {1, 0, 2}, {2, 1, 0}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1, 2}, +#endif + {0, 2, 1}, {1, 0, 2}, {2, 1, 0}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -150,8 +159,12 @@ void TestTranspose3D(Place place, float abs_error) { void TestTranspose4D(Place place, float abs_error) { DDim x_dims{{2, 3, 4, 5}}; - std::vector> axes{ - {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {3, 1, 2, 0}, {3, 1, 0, 2}}; + std::vector> axes { +#if !defined(LITE_WITH_XPU) + {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {3, 1, 2, 0}, {3, 1, 0, 2}, +#endif + {0, 2, 3, 1}, {0, 3, 1, 2}, + }; for (auto axis : axes) { std::unique_ptr tester( new TransposeComputeTester(place, "def", x_dims, axis)); @@ -170,8 +183,8 @@ TEST(Transpose, precision) { #elif defined(LITE_WITH_HUAWEI_ASCEND_NPU) place = TARGET(kHuaweiAscendNPU); abs_error = 1e-2; // precision_mode default is force_fp16 -// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT -// place = TARGET(kXPU); +#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) + place = TARGET(kXPU); #else return; #endif