diff --git a/lite/kernels/npu/bridges/fc_op_test.cc b/lite/kernels/npu/bridges/fc_op_test.cc deleted file mode 100644 index 77015236e2eed847d0ec0ea5c06e646e5893f29a..0000000000000000000000000000000000000000 --- a/lite/kernels/npu/bridges/fc_op_test.cc +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/fc_op.h" -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -void fc_ref(const std::shared_ptr op) { - Scope* scope = op->scope(); - const OpInfo* op_info = op->op_info(); - auto input = - scope->FindVar(op_info->Input("Input").front())->GetMutable(); - auto w = scope->FindVar(op_info->Input("W").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - int32_t in_num_col_dims = op_info->GetAttr("in_num_col_dims"); - Tensor* bias = nullptr; - float* bias_data = nullptr; - if (op_info->HasInput("Bias")) { - auto bias_var_names = op_info->Input("Bias"); - if (bias_var_names.size() > 0) { - auto bias_var_name = bias_var_names.front(); - bias = scope->FindVar(bias_var_name)->GetMutable(); - bias_data = bias->mutable_data(); - } - } - auto input_data = input->data(); - auto w_data = w->mutable_data(); - auto out_data = out->mutable_data(); - auto in_mat_dims = input->dims().Flatten2D(in_num_col_dims); - int out_num_classes = w->dims()[1]; - const int M = in_mat_dims[0]; - const int K = in_mat_dims[1]; - const int N = out_num_classes; - for (int m = 0; m < M; ++m) { - for (int n = 0; n < N; ++n) { - out_data[m * N + n] = 0; - for (int k = 0; k < K; ++k) { - out_data[m * N + n] += input_data[m * K + k] * w_data[k * N + n]; - } - } - } - if (bias_data != nullptr) { - for (int m = 0; m < M; ++m) { - for (int n = 0; n < N; ++n) { - out_data[m * N + n] += bias_data[n]; - } - } - } -} - -void test_fc(const std::vector& input_shape, - const std::vector& w_shape, - int in_num_col_dims, - bool has_bias) { - CHECK_EQ(w_shape.size(), 2UL); - - const auto& bridges = lite::kernels::npu::bridges::Factory::Instance(); - const auto& supported_lists = bridges.AllFunctions(); - CHECK(bridges.HasType("fc")); - - Scope scope; - std::string input_var_name("Input"); - std::string w_var_name("W"); - std::string bias_var_name("Bias"); - std::string out_var_name("Out"); - std::string out_ref_var_name("out_ref"); - auto* input = scope.Var(input_var_name)->GetMutable(); - auto* w = scope.Var(w_var_name)->GetMutable(); - auto* out = scope.Var(out_var_name)->GetMutable(); - auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - input->Resize(input_shape); - w->Resize(w_shape); - - FillTensor(input); - FillTensor(w); - - // create fc op - cpp::OpDesc fc_op_desc; - fc_op_desc.SetType("fc"); - fc_op_desc.SetInput("Input", {input_var_name}); - fc_op_desc.SetInput("W", {w_var_name}); - fc_op_desc.SetOutput("Out", {out_var_name}); - fc_op_desc.SetAttr("in_num_col_dims", static_cast(in_num_col_dims)); - if (has_bias) { - auto* bias = scope.Var(bias_var_name)->GetMutable(); - bias->Resize({w_shape[1]}); - FillTensor(bias); - fc_op_desc.SetInput("Bias", {bias_var_name}); - } - - auto fc_op = CreateOp(fc_op_desc, &scope); - LauchOp(fc_op, {input_var_name}, {out_var_name}); - out_ref->CopyDataFrom(*out); - - // compare results - fc_ref(fc_op); - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); - } -} - -TEST(NPUBridges, fc) { - for (bool use_bias : {true, false}) { - test_fc({1, 8, 8, 1}, {8, 4}, 2, use_bias); - test_fc({1, 5, 5, 1}, {5, 7}, 2, use_bias); - test_fc({1, 4, 1, 1}, {4, 8}, 1, use_bias); - test_fc({1, 1024, 1, 1}, {1024, 1000}, 1, use_bias); - } -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(fc); -USE_NPU_BRIDGE(fc); diff --git a/lite/kernels/npu/bridges/reshape_op_test.cc b/lite/kernels/npu/bridges/reshape_op_test.cc deleted file mode 100644 index d675b5cac2bc8975e6ed9f8521a700f579d0e2b7..0000000000000000000000000000000000000000 --- a/lite/kernels/npu/bridges/reshape_op_test.cc +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/reshape_op.h" -#include -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -void reshape_ref(const std::shared_ptr op) { - auto scope = op->scope(); - auto op_info = op->op_info(); - auto op_type = op_info->Type(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - auto x_dims = x->dims(); - auto shape = op_info->GetAttr>("shape"); - auto inplace = op_info->GetAttr("inplace"); - if (op_info->HasInput("Shape")) { - auto actual_shape_var_names = op_info->Input("Shape"); - if (actual_shape_var_names.size() > 0) { - auto actual_shape = scope->FindVar(actual_shape_var_names.front()) - ->GetMutable(); - auto actual_shape_dims = actual_shape->dims(); - auto* actual_shape_data = actual_shape->data(); - shape = - std::vector(actual_shape_data, - actual_shape_data + actual_shape_dims.production()); - } - } - if (inplace) { - out->ShareDataWith(*x); - } else { - out->CopyDataFrom(*x); - } - auto out_dims = operators::ValidateShape(shape, x_dims); - out->Resize(out_dims); -} - -void test_reshape(const std::vector& x_shape, - const std::vector& shape, - const std::vector& act_shape, - bool inplace, - bool reshape2) { - // prepare input&output variables - Scope scope; - std::string x_var_name("x"); - std::string actual_shape_var_name("actual_shape"); - std::string out_var_name("out"); - std::string out_ref_var_name("out_ref"); - std::string xshape_var_name("xshape"); - std::string xshape_ref_var_name("xshape_ref"); - auto x = scope.Var(x_var_name)->GetMutable(); - auto actual_shape = scope.Var(actual_shape_var_name)->GetMutable(); - auto out = scope.Var(out_var_name)->GetMutable(); - auto out_ref = scope.Var(out_ref_var_name)->GetMutable(); - auto xshape = scope.Var(xshape_var_name)->GetMutable(); - auto xshape_ref = scope.Var(xshape_ref_var_name)->GetMutable(); - - x->Resize(x_shape); - - // initialize input&output data - FillTensor(x); - - // initialize op desc - cpp::OpDesc opdesc; - opdesc.SetType(reshape2 ? "reshape2" : "reshape"); - opdesc.SetInput("X", {x_var_name}); - opdesc.SetOutput("Out", {out_var_name}); - opdesc.SetAttr("shape", shape); - opdesc.SetAttr("inplace", inplace); - if (!act_shape.empty()) { - int64_t act_shape_size = act_shape.size(); - actual_shape->Resize({act_shape_size}); - memcpy(actual_shape->mutable_data(), - act_shape.data(), - act_shape_size * sizeof(int)); - opdesc.SetInput("Shape", {actual_shape_var_name}); - } - if (reshape2) { - opdesc.SetOutput("XShape", {xshape_var_name}); - } - - // create op and execute reference implementation - auto op = reshape2 ? CreateOp(opdesc, &scope) - : CreateOp(opdesc, &scope); - reshape_ref(op); - out_ref->CopyDataFrom(*out); - if (reshape2) { - xshape_ref->CopyDataFrom(*xshape); - } - - // convert op to NPU model, then run it on NPU - LauchOp(op, - {x_var_name}, - {out_var_name}); // TODO(hong19860320) support XShape for reshape2 - - // compare results - auto out_dims = out->dims(); - auto out_ref_dims = out_ref->dims(); - CHECK_EQ(out_dims.size(), out_ref_dims.size()); - for (int i = 0; i < out_dims.size(); i++) { - CHECK_EQ(out_dims[i], out_ref_dims[i]); - } - auto out_data = out->mutable_data(); - auto out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - VLOG(5) << i; - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); - } - // if (reshape2) { - // auto xshape_dims = xshape->dims(); - // auto xshape_ref_dims = xshape_ref->dims(); - // CHECK_EQ(xshape_dims.size(), xshape_ref_dims.size()); - // for (size_t i = 0; i < xshape_dims.size(); i++) { - // CHECK_EQ(xshape_dims[i], xshape_ref_dims[i]); - // } - // } -} - -TEST(NPUBridges, reshape) { -#if 1 - std::map, std::vector>> tests = { - {{1, 2, 4, 6}, - {{}, - {-1}, - {48}, - {-1, 48}, - {1, 48}, - {0, 48}, - {48, -1}, - {48, 1}, - {-1, 24}, - {2, 24}, - {24, 0}, - {-1, 0, 3, 2}, - {4, 2, 3, 2}, - {0, -1, 3, 2}, - {1, 8, 3, 2}}}}; - for (auto& i : tests) { - for (auto& shape : i.second) { - if (shape.empty()) { - continue; - } - for (auto& act_shape : i.second) { - for (auto& inplace : {true, false}) { - for (auto& reshape2 : {true, false}) { - std::stringstream ss; - ss << "x:{ "; - for (auto s : i.first) { - ss << s << " "; - } - ss << "} shape:{ "; - for (auto s : shape) { - ss << s << " "; - } - ss << "} act_shape:{ "; - for (auto s : act_shape) { - ss << s << " "; - } - VLOG(3) << ss.str() << "} inplace:" << inplace - << " reshape2:" << reshape2; - test_reshape(i.first, shape, act_shape, inplace, reshape2); - } - } - } - } - } -#else - test_reshape({2, 4, 6}, {-1, 0, 4, 3}, {}, true, true); - test_reshape({1, 232, 14, 14}, {-1, 2, 116, 14, 14}, {}, true, true); -#endif -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(reshape); -USE_NPU_BRIDGE(reshape); - -USE_LITE_OP(reshape2); -USE_NPU_BRIDGE(reshape2); diff --git a/lite/kernels/npu/bridges/softmax_op_test.cc b/lite/kernels/npu/bridges/softmax_op_test.cc deleted file mode 100644 index 3401a0f89db88eca21fda9d5654b73fd348a5ed0..0000000000000000000000000000000000000000 --- a/lite/kernels/npu/bridges/softmax_op_test.cc +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/operators/softmax_op.h" -#include -#include "lite/core/op_registry.h" -#include "lite/kernels/npu/bridges/registry.h" -#include "lite/kernels/npu/bridges/test_helper.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace npu { -namespace bridges { - -template -void softmax_ref(const std::shared_ptr op) { - Scope* scope = op->scope(); - const OpInfo* op_info = op->op_info(); - auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); - auto out = - scope->FindVar(op_info->Output("Out").front())->GetMutable(); - auto x_data = x->data(); - auto out_data = out->mutable_data(); - DDim x_dims = x->dims(); - - auto x_rank = x_dims.size(); - int axis = op_info->GetAttr("axis"); - if (axis < 0) { - axis += x_rank; - } - int axis_size = x_dims[axis]; - int outer_num = x_dims.Slice(0, axis).production(); - int inner_num = x_dims.Slice(axis + 1, x_rank).production(); - int compute_size = outer_num * inner_num; - for (int i = 0; i < compute_size; i++) { - int idx_inner = i % inner_num; - int idx_outer = (i / inner_num) * axis_size; - int start = idx_outer * inner_num + idx_inner; - int offset; - - offset = start; - dtype max_data = std::numeric_limits::lowest(); - for (int j = 0; j < axis_size; j++) { - max_data = x_data[offset] > max_data ? x_data[offset] : max_data; - offset += inner_num; - } - - offset = start; - dtype sum_data = (dtype)0; - for (int j = 0; j < axis_size; j++) { - out_data[offset] = exp(x_data[offset] - max_data); - sum_data += out_data[offset]; - offset += inner_num; - } - - offset = start; - for (int j = 0; j < axis_size; j++) { - out_data[offset] /= sum_data; - offset += inner_num; - } - } -} - -void test_softmax(const std::vector& input_shape, int axis) { - // prepare input&output variables - Scope scope; - std::string x_var_name = "x"; - std::string out_var_name = "out"; - std::string out_ref_var_name = "out_ref"; - auto* x = scope.Var(x_var_name)->GetMutable(); - auto* out = scope.Var(out_var_name)->GetMutable(); - auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); - x->Resize(input_shape); - - // initialize input&output data - FillTensor(x); - - // initialize op desc - cpp::OpDesc opdesc; - opdesc.SetType("softmax"); - opdesc.SetInput("X", {x_var_name}); - opdesc.SetOutput("Out", {out_var_name}); - opdesc.SetAttr("axis", axis); - - // create and convert op to NPU model, then run it on NPU - auto op = CreateOp(opdesc, &scope); - LauchOp(op, {x_var_name}, {out_var_name}); - out_ref->CopyDataFrom(*out); - - // execute reference implementation and save to output tensor - softmax_ref(op); - - // compare results - auto* out_data = out->mutable_data(); - auto* out_ref_data = out_ref->mutable_data(); - for (int i = 0; i < out->dims().production(); i++) { - EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2); - } -} - -TEST(NPUBridges, softmax) { - test_softmax({1, 4}, -1); - // Bug exists in HiAI DDK when the number of items > 16500 - // test_softmax({1, 16500}, -1); - test_softmax({1, 4}, 0); - test_softmax({1, 4}, 1); - test_softmax({3, 4}, -1); - test_softmax({3, 4}, 0); - test_softmax({3, 4}, 1); - test_softmax({1, 4, 7}, -1); - test_softmax({1, 4, 7}, 0); - // Bug exists in HiAI DDK when axis is 1 and iw > 1 - // test_softmax({1, 4, 7}, 1); - test_softmax({1, 4, 1}, 1); - test_softmax({1, 4, 7}, 2); - test_softmax({3, 4, 7}, -1); - test_softmax({3, 4, 7}, 0); - test_softmax({3, 4, 1}, 1); - test_softmax({3, 4, 7}, 2); - test_softmax({1, 4, 7, 9}, -1); - test_softmax({1, 4, 7, 9}, 0); - test_softmax({1, 4, 7, 9}, 1); - // Bug exists in HiAI DDK when axis is 2 and iw > 1 - // test_softmax({1, 4, 7, 9}, 2); - test_softmax({1, 4, 7, 1}, 2); - test_softmax({1, 4, 7, 9}, 3); - test_softmax({3, 4, 7, 9}, -1); - test_softmax({3, 4, 7, 9}, 0); - test_softmax({3, 4, 7, 9}, 1); - test_softmax({3, 4, 7, 1}, 2); - test_softmax({3, 4, 7, 9}, 3); -} - -} // namespace bridges -} // namespace npu -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_OP(softmax); -USE_NPU_BRIDGE(softmax); diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index b6acfb45c11fe5e033a4eab23246ada9cd3d8451..3d6eb9eb8ace0dde1fba92af88ab3af20a87a2ed 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -2,21 +2,21 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) @@ -31,28 +31,28 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_ lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) if(LITE_BUILD_EXTRA) - lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif()