From 76410577e1b55a78facf312333188d828dbd2f21 Mon Sep 17 00:00:00 2001 From: hong19860320 <9973393+hong19860320@users.noreply.github.com> Date: Fri, 25 Sep 2020 12:47:18 +0800 Subject: [PATCH] [CI] Enable CI for Huawei kirin NPU, Rockchip NPU and MediaTek APU (#4408) --- lite/CMakeLists.txt | 25 +- lite/core/arena/CMakeLists.txt | 2 +- lite/tests/api/CMakeLists.txt | 93 +-- .../test_mobilenetv1_fp32_huawei_kirin_npu.cc | 101 ++++ lite/tests/api/test_mobilenetv1_int8_apu.cc | 160 ----- .../api/test_mobilenetv1_int8_mediatek_apu.cc | 102 ++++ lite/tests/api/test_mobilenetv1_int8_rknpu.cc | 127 ---- .../api/test_mobilenetv1_int8_rockchip_npu.cc | 102 ++++ .../test_mobilenetv2_fp32_huawei_kirin_npu.cc | 101 ++++ .../test_resnet50_fp32_huawei_kirin_npu.cc | 101 ++++ lite/tests/kernels/CMakeLists.txt | 168 +++--- lite/tools/ci_build.sh | 567 +++++++++++++----- 12 files changed, 1064 insertions(+), 585 deletions(-) create mode 100644 lite/tests/api/test_mobilenetv1_fp32_huawei_kirin_npu.cc delete mode 100644 lite/tests/api/test_mobilenetv1_int8_apu.cc create mode 100644 lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc delete mode 100644 lite/tests/api/test_mobilenetv1_int8_rknpu.cc create mode 100644 lite/tests/api/test_mobilenetv1_int8_rockchip_npu.cc create mode 100644 lite/tests/api/test_mobilenetv2_fp32_huawei_kirin_npu.cc create mode 100644 lite/tests/api/test_resnet50_fp32_huawei_kirin_npu.cc diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index d69f6d6d9e..abb769261f 100755 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -38,34 +38,31 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND NOT LITE_ON_TINY_PUBLISH) endif() if (WITH_TESTING) + set(LITE_URL_FOR_UNITTESTS "http://paddle-inference-dist.bj.bcebos.com/PaddleLite/models_and_data_for_unittests") + # models lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz") + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz") + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz") if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1_int16.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "resnet50.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "MobileNetV1_quant.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "transformer_with_mask_fp32.tar.gz") - endif() - if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "mobilenet_v1_int8_for_mediatek_apu.tar.gz") + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "mobilenet_v1_int8_for_rockchip_npu.tar.gz") + else() lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "GoogleNet_inference.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "step_rnn.tar.gz") - - set(LITE_URL_FOR_UNITTESTS "http://paddle-inference-dist.bj.bcebos.com/PaddleLite/models_and_data_for_unittests") - # models lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "resnet50.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ernie.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "GoogLeNet.tar.gz") lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "VGG19.tar.gz") - # data - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ILSVRC2012_small.tar.gz") - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert_data.tar.gz") endif() + # data + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ILSVRC2012_small.tar.gz") + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert_data.tar.gz") endif() # ----------------------------- PUBLISH ----------------------------- diff --git a/lite/core/arena/CMakeLists.txt b/lite/core/arena/CMakeLists.txt index 53988f063b..d5adf84753 100644 --- a/lite/core/arena/CMakeLists.txt +++ b/lite/core/arena/CMakeLists.txt @@ -6,5 +6,5 @@ endif() lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest) if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) - lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() diff --git a/lite/tests/api/CMakeLists.txt b/lite/tests/api/CMakeLists.txt index 795b195a03..636d9d5574 100644 --- a/lite/tests/api/CMakeLists.txt +++ b/lite/tests/api/CMakeLists.txt @@ -1,52 +1,71 @@ -if(LITE_WITH_ARM) - lite_cc_test(test_transformer_with_mask_fp32_arm SRCS test_transformer_with_mask_fp32_arm.cc +function(lite_cc_test_with_model_and_data TARGET) + if(NOT WITH_TESTING) + return() + endif() + + set(options "") + set(oneValueArgs MODEL DATA CONFIG ARGS) + set(multiValueArgs "") + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(ARGS "") + if(DEFINED args_MODEL) + set(ARGS "${ARGS} --model_dir=${LITE_MODEL_DIR}/${args_MODEL}") + endif() + if(DEFINED args_DATA) + set(ARGS "${ARGS} --data_dir=${LITE_MODEL_DIR}/${args_DATA}") + endif() + if(DEFINED args_CONFIG) + set(ARGS "${ARGS} --config_dir=${LITE_MODEL_DIR}/${args_CONFIG}") + endif() + if(DEFINED args_ARGS) + set(ARGS "${ARGS} ${args_ARGS}") + endif() + lite_cc_test(${TARGET} SRCS ${TARGET}.cc DEPS ${lite_model_test_DEPS} paddle_api_full ARM_DEPS ${arm_kernels} - ARGS --model_dir=${LITE_MODEL_DIR}/transformer_with_mask_fp32 SERIAL) - if(WITH_TESTING) - add_dependencies(test_transformer_with_mask_fp32_arm extern_lite_download_transformer_with_mask_fp32_tar_gz) + X86_DEPS ${x86_kernels} + NPU_DEPS ${npu_kernels} ${npu_bridges} + HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels} ${huawei_ascend_npu_bridges} + XPU_DEPS ${xpu_kernels} ${xpu_bridges} + APU_DEPS ${apu_kernels} ${apu_bridges} + RKNPU_DEPS ${rknpu_kernels} ${rknpu_bridges} + BM_DEPS ${bm_kernels} ${bm_bridges} + MLU_DEPS ${mlu_kernels} ${mlu_bridges} + ARGS ${ARGS} SERIAL) + if(DEFINED args_MODEL) + add_dependencies(${TARGET} extern_lite_download_${args_MODEL}_tar_gz) endif() -endif() - -function(xpu_x86_without_xtcl_test TARGET MODEL DATA) - if(${DATA} STREQUAL "") - lite_cc_test(${TARGET} SRCS ${TARGET}.cc - DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils - ${ops} ${host_kernels} ${x86_kernels} ${xpu_kernels} - ARGS --model_dir=${LITE_MODEL_DIR}/${MODEL}) - else() - lite_cc_test(${TARGET} SRCS ${TARGET}.cc - DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils - ${ops} ${host_kernels} ${x86_kernels} ${xpu_kernels} - ARGS --model_dir=${LITE_MODEL_DIR}/${MODEL} --data_dir=${LITE_MODEL_DIR}/${DATA}) + if(DEFINED args_DATA) + add_dependencies(${TARGET} extern_lite_download_${args_DATA}_tar_gz) endif() - - if(WITH_TESTING) - add_dependencies(${TARGET} extern_lite_download_${MODEL}_tar_gz) - if(NOT ${DATA} STREQUAL "") - add_dependencies(${TARGET} extern_lite_download_${DATA}_tar_gz) - endif() + if(DEFINED args_CONFIG) + add_dependencies(${TARGET} extern_lite_download_${args_CONFIG}_tar_gz) endif() endfunction() +if(LITE_WITH_ARM) + lite_cc_test_with_model_and_data(test_transformer_with_mask_fp32_arm MODEL transformer_with_mask_fp32 ARGS) +endif() + +if(LITE_WITH_NPU) + lite_cc_test_with_model_and_data(test_mobilenetv1_fp32_huawei_kirin_npu MODEL mobilenet_v1 DATA ILSVRC2012_small) + lite_cc_test_with_model_and_data(test_mobilenetv2_fp32_huawei_kirin_npu MODEL mobilenet_v2_relu DATA ILSVRC2012_small) + lite_cc_test_with_model_and_data(test_resnet50_fp32_huawei_kirin_npu MODEL resnet50 DATA ILSVRC2012_small) +endif() + if(LITE_WITH_XPU AND NOT LITE_WITH_XTCL) - xpu_x86_without_xtcl_test(test_resnet50_fp32_xpu resnet50 ILSVRC2012_small) - xpu_x86_without_xtcl_test(test_googlenet_fp32_xpu GoogLeNet ILSVRC2012_small) - xpu_x86_without_xtcl_test(test_vgg19_fp32_xpu VGG19 ILSVRC2012_small) - xpu_x86_without_xtcl_test(test_ernie_fp32_xpu ernie bert_data) - xpu_x86_without_xtcl_test(test_bert_fp32_xpu bert bert_data) + lite_cc_test_with_model_and_data(test_resnet50_fp32_xpu MODEL resnet50 DATA ILSVRC2012_small) + lite_cc_test_with_model_and_data(test_googlenet_fp32_xpu MODEL GoogLeNet DATA ILSVRC2012_small) + lite_cc_test_with_model_and_data(test_vgg19_fp32_xpu MODEL VGG19 DATA ILSVRC2012_small) + lite_cc_test_with_model_and_data(test_ernie_fp32_xpu MODEL ernie DATA bert_data) + lite_cc_test_with_model_and_data(test_bert_fp32_xpu MODEL bert DATA bert_data) endif() if(LITE_WITH_RKNPU) - lite_cc_test(test_mobilenetv1_int8_rknpu SRCS test_mobilenetv1_int8_rknpu.cc - DEPS ${lite_model_test_DEPS} paddle_api_full - RKNPU_DEPS ${rknpu_kernels} ${rknpu_bridges} - ARGS --model_dir=${LITE_MODEL_DIR}/MobilenetV1_full_quant SERIAL) + lite_cc_test_with_model_and_data(test_mobilenetv1_int8_rockchip_npu MODEL mobilenet_v1_int8_for_rockchip_npu DATA ILSVRC2012_small) endif() if(LITE_WITH_APU) - lite_cc_test(test_mobilenetv1_int8_apu SRCS test_mobilenetv1_int8_apu.cc - DEPS ${lite_model_test_DEPS} paddle_api_full - APU_DEPS ${apu_kernels} ${apu_bridges} - ARGS --model_dir=${LITE_MODEL_DIR}/MobilenetV1_full_quant SERIAL) + lite_cc_test_with_model_and_data(test_mobilenetv1_int8_mediatek_apu MODEL mobilenet_v1_int8_for_mediatek_apu DATA ILSVRC2012_small) endif() diff --git a/lite/tests/api/test_mobilenetv1_fp32_huawei_kirin_npu.cc b/lite/tests/api/test_mobilenetv1_fp32_huawei_kirin_npu.cc new file mode 100644 index 0000000000..0b890cdd08 --- /dev/null +++ b/lite/tests/api/test_mobilenetv1_fp32_huawei_kirin_npu.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/lite_api_test_helper.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/tests/api/ILSVRC2012_utility.h" +#include "lite/utils/cp_logging.h" + +DEFINE_string(data_dir, "", "data dir"); +DEFINE_int32(iteration, 100, "iteration times to run"); +DEFINE_int32(batch, 1, "batch of image"); +DEFINE_int32(channel, 3, "image channel"); + +namespace paddle { +namespace lite { + +TEST(MobileNetV1, test_mobilenetv1_fp32_huawei_kirin_npu) { + lite_api::CxxConfig config; + config.set_model_dir(FLAGS_model_dir); + config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}}); + auto predictor = lite_api::CreatePaddlePredictor(config); + + std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data"); + std::vector input_shape{ + FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height}; + auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration); + + int input_size = 1; + for (auto i : input_shape) { + input_size *= i; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + for (int j = 0; j < input_size; j++) { + data[j] = 0.f; + } + predictor->Run(); + } + + std::vector> out_rets; + out_rets.resize(FLAGS_iteration); + double cost_time = 0; + for (size_t i = 0; i < raw_data.size(); ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + memcpy(data, raw_data[i].data(), sizeof(float) * input_size); + + double start = GetCurrentUS(); + predictor->Run(); + cost_time += GetCurrentUS() - start; + + auto output_tensor = predictor->GetOutput(0); + auto output_shape = output_tensor->shape(); + auto output_data = output_tensor->data(); + ASSERT_EQ(output_shape.size(), 2UL); + ASSERT_EQ(output_shape[0], 1); + ASSERT_EQ(output_shape[1], 1000); + + int output_size = output_shape[0] * output_shape[1]; + out_rets[i].resize(output_size); + memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch + << ", iteration: " << FLAGS_iteration << ", spend " + << cost_time / FLAGS_iteration / 1000.0 << " ms in average."; + + std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt"); + float out_accuracy = CalOutAccuracy(out_rets, labels_dir); + ASSERT_GE(out_accuracy, 0.57f); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/api/test_mobilenetv1_int8_apu.cc b/lite/tests/api/test_mobilenetv1_int8_apu.cc deleted file mode 100644 index 730ed3e823..0000000000 --- a/lite/tests/api/test_mobilenetv1_int8_apu.cc +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include "lite/api/paddle_api.h" -#include "lite/api/paddle_use_kernels.h" -#include "lite/api/paddle_use_ops.h" -#include "lite/api/paddle_use_passes.h" -using namespace paddle::lite_api; // NOLINT - -inline double GetCurrentUS() { - struct timeval time; - gettimeofday(&time, NULL); - return 1e+6 * time.tv_sec + time.tv_usec; -} - -inline int64_t ShapeProduction(std::vector shape) { - int64_t s = 1; - for (int64_t dim : shape) { - s *= dim; - } - return s; -} - -int main(int argc, char** argv) { - if (argc < 2) { - std::cerr << "[ERROR] usage: ./" << argv[0] - << " model_dir [thread_num] [warmup_times] [repeat_times] " - "[input_data_path] [output_data_path]" - << std::endl; - return -1; - } - std::string model_dir = argv[1]; - int thread_num = 1; - if (argc > 2) { - thread_num = atoi(argv[2]); - } - int warmup_times = 5; - if (argc > 3) { - warmup_times = atoi(argv[3]); - } - int repeat_times = 10; - if (argc > 4) { - repeat_times = atoi(argv[4]); - } - std::string input_data_path; - if (argc > 5) { - input_data_path = argv[5]; - } - std::string output_data_path; - if (argc > 6) { - output_data_path = argv[6]; - } - paddle::lite_api::CxxConfig config; - config.set_model_dir(model_dir); - config.set_threads(thread_num); - config.set_power_mode(paddle::lite_api::LITE_POWER_HIGH); - config.set_valid_places( - {paddle::lite_api::Place{ - TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - paddle::lite_api::Place{ - TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)}, - paddle::lite_api::Place{ - TARGET(kAPU), PRECISION(kInt8), DATALAYOUT(kNCHW)}}); - auto predictor = paddle::lite_api::CreatePaddlePredictor(config); - - std::unique_ptr input_tensor( - std::move(predictor->GetInput(0))); - input_tensor->Resize({1, 3, 224, 224}); - auto input_data = input_tensor->mutable_data(); - auto input_size = ShapeProduction(input_tensor->shape()); - - // test loop - int total_imgs = 500; - float test_num = 0; - float top1_num = 0; - float top5_num = 0; - int output_len = 1000; - std::vector index(1000); - bool debug = true; // false; - int show_step = 500; - for (int i = 0; i < total_imgs; i++) { - // set input - std::string filename = input_data_path + "/" + std::to_string(i); - std::ifstream fs(filename, std::ifstream::binary); - if (!fs.is_open()) { - std::cout << "open input file fail."; - } - auto input_data_tmp = input_data; - for (int i = 0; i < input_size; ++i) { - fs.read(reinterpret_cast(input_data_tmp), sizeof(*input_data_tmp)); - input_data_tmp++; - } - int label = 0; - fs.read(reinterpret_cast(&label), sizeof(label)); - fs.close(); - - if (debug && i % show_step == 0) { - std::cout << "input data:" << std::endl; - std::cout << input_data[0] << " " << input_data[10] << " " - << input_data[input_size - 1] << std::endl; - std::cout << "label:" << label << std::endl; - } - - // run - predictor->Run(); - auto output0 = predictor->GetOutput(0); - auto output0_data = output0->data(); - - // get output - std::iota(index.begin(), index.end(), 0); - std::stable_sort( - index.begin(), index.end(), [output0_data](size_t i1, size_t i2) { - return output0_data[i1] > output0_data[i2]; - }); - test_num++; - if (label == index[0]) { - top1_num++; - } - for (int i = 0; i < 5; i++) { - if (label == index[i]) { - top5_num++; - } - } - - if (debug && i % show_step == 0) { - std::cout << index[0] << " " << index[1] << " " << index[2] << " " - << index[3] << " " << index[4] << std::endl; - std::cout << output0_data[index[0]] << " " << output0_data[index[1]] - << " " << output0_data[index[2]] << " " - << output0_data[index[3]] << " " << output0_data[index[4]] - << std::endl; - std::cout << output0_data[630] << std::endl; - } - if (i % show_step == 0) { - std::cout << "step " << i << "; top1 acc:" << top1_num / test_num - << "; top5 acc:" << top5_num / test_num << std::endl; - } - } - std::cout << "final result:" << std::endl; - std::cout << "top1 acc:" << top1_num / test_num << std::endl; - std::cout << "top5 acc:" << top5_num / test_num << std::endl; - return 0; -} diff --git a/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc b/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc new file mode 100644 index 0000000000..76b3722d2d --- /dev/null +++ b/lite/tests/api/test_mobilenetv1_int8_mediatek_apu.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/lite_api_test_helper.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/tests/api/ILSVRC2012_utility.h" +#include "lite/utils/cp_logging.h" + +DEFINE_string(data_dir, "", "data dir"); +DEFINE_int32(iteration, 100, "iteration times to run"); +DEFINE_int32(batch, 1, "batch of image"); +DEFINE_int32(channel, 3, "image channel"); + +namespace paddle { +namespace lite { + +TEST(MobileNetV1, test_mobilenetv1_int8_mediatek_apu) { + lite_api::CxxConfig config; + config.set_model_dir(FLAGS_model_dir); + config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + lite_api::Place{TARGET(kARM), PRECISION(kInt8)}, + lite_api::Place{TARGET(kAPU), PRECISION(kInt8)}}); + auto predictor = lite_api::CreatePaddlePredictor(config); + + std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data"); + std::vector input_shape{ + FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height}; + auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration); + + int input_size = 1; + for (auto i : input_shape) { + input_size *= i; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + for (int j = 0; j < input_size; j++) { + data[j] = 0.f; + } + predictor->Run(); + } + + std::vector> out_rets; + out_rets.resize(FLAGS_iteration); + double cost_time = 0; + for (size_t i = 0; i < raw_data.size(); ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + memcpy(data, raw_data[i].data(), sizeof(float) * input_size); + + double start = GetCurrentUS(); + predictor->Run(); + cost_time += GetCurrentUS() - start; + + auto output_tensor = predictor->GetOutput(0); + auto output_shape = output_tensor->shape(); + auto output_data = output_tensor->data(); + ASSERT_EQ(output_shape.size(), 2UL); + ASSERT_EQ(output_shape[0], 1); + ASSERT_EQ(output_shape[1], 1000); + + int output_size = output_shape[0] * output_shape[1]; + out_rets[i].resize(output_size); + memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch + << ", iteration: " << FLAGS_iteration << ", spend " + << cost_time / FLAGS_iteration / 1000.0 << " ms in average."; + + std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt"); + float out_accuracy = CalOutAccuracy(out_rets, labels_dir); + ASSERT_GE(out_accuracy, 0.55f); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/api/test_mobilenetv1_int8_rknpu.cc b/lite/tests/api/test_mobilenetv1_int8_rknpu.cc deleted file mode 100644 index 8c123088b3..0000000000 --- a/lite/tests/api/test_mobilenetv1_int8_rknpu.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include "lite/api/paddle_api.h" -#include "lite/api/paddle_use_kernels.h" -#include "lite/api/paddle_use_ops.h" -#include "lite/api/paddle_use_passes.h" - -inline double GetCurrentUS() { - struct timeval time; - gettimeofday(&time, NULL); - return 1e+6 * time.tv_sec + time.tv_usec; -} - -inline int64_t ShapeProduction(std::vector shape) { - int64_t s = 1; - for (int64_t dim : shape) { - s *= dim; - } - return s; -} - -int main(int argc, char** argv) { - if (argc < 2) { - std::cerr << "[ERROR] usage: ./" << argv[0] - << " model_dir [thread_num] [warmup_times] [repeat_times] " - "[input_data_path] [output_data_path]" - << std::endl; - return -1; - } - std::string model_dir = argv[1]; - int thread_num = 1; - if (argc > 2) { - thread_num = atoi(argv[2]); - } - int warmup_times = 5; - if (argc > 3) { - warmup_times = atoi(argv[3]); - } - int repeat_times = 10; - if (argc > 4) { - repeat_times = atoi(argv[4]); - } - std::string input_data_path; - if (argc > 5) { - input_data_path = argv[5]; - } - std::string output_data_path; - if (argc > 6) { - output_data_path = argv[6]; - } - paddle::lite_api::CxxConfig config; - config.set_model_dir(model_dir); - config.set_threads(thread_num); - config.set_power_mode(paddle::lite_api::LITE_POWER_HIGH); - config.set_valid_places( - {paddle::lite_api::Place{ - TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - paddle::lite_api::Place{ - TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)}, - paddle::lite_api::Place{ - TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)}, - paddle::lite_api::Place{ - TARGET(kRKNPU), PRECISION(kInt8), DATALAYOUT(kNCHW)}}); - auto predictor = paddle::lite_api::CreatePaddlePredictor(config); - - std::unique_ptr input_tensor( - std::move(predictor->GetInput(0))); - input_tensor->Resize({1, 3, 224, 224}); - auto input_data = input_tensor->mutable_data(); - auto input_size = ShapeProduction(input_tensor->shape()); - if (input_data_path.empty()) { - for (int i = 0; i < input_size; i++) { - input_data[i] = 1; - } - } else { - std::fstream fs(input_data_path, std::ios::in); - if (!fs.is_open()) { - std::cerr << "open input data file failed." << std::endl; - return -1; - } - for (int i = 0; i < input_size; i++) { - fs >> input_data[i]; - } - } - - for (int i = 0; i < warmup_times; ++i) { - predictor->Run(); - } - - auto start = GetCurrentUS(); - for (int i = 0; i < repeat_times; ++i) { - predictor->Run(); - } - - std::cout << "Model: " << model_dir << ", threads num " << thread_num - << ", warmup times: " << warmup_times - << ", repeat times: " << repeat_times << ", spend " - << (GetCurrentUS() - start) / repeat_times / 1000.0 - << " ms in average." << std::endl; - - std::unique_ptr output_tensor( - std::move(predictor->GetOutput(0))); - auto output_data = output_tensor->data(); - auto output_size = ShapeProduction(output_tensor->shape()); - std::cout << "output data:"; - for (int i = 0; i < output_size; i += 100) { - std::cout << "[" << i << "] " << output_data[i] << std::endl; - } - return 0; -} diff --git a/lite/tests/api/test_mobilenetv1_int8_rockchip_npu.cc b/lite/tests/api/test_mobilenetv1_int8_rockchip_npu.cc new file mode 100644 index 0000000000..7b52e4398a --- /dev/null +++ b/lite/tests/api/test_mobilenetv1_int8_rockchip_npu.cc @@ -0,0 +1,102 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/lite_api_test_helper.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/tests/api/ILSVRC2012_utility.h" +#include "lite/utils/cp_logging.h" + +DEFINE_string(data_dir, "", "data dir"); +DEFINE_int32(iteration, 100, "iteration times to run"); +DEFINE_int32(batch, 1, "batch of image"); +DEFINE_int32(channel, 3, "image channel"); + +namespace paddle { +namespace lite { + +TEST(MobileNetV1, test_mobilenetv1_int8_rockchip_apu) { + lite_api::CxxConfig config; + config.set_model_dir(FLAGS_model_dir); + config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + lite_api::Place{TARGET(kARM), PRECISION(kInt8)}, + lite_api::Place{TARGET(kRKNPU), PRECISION(kInt8)}}); + auto predictor = lite_api::CreatePaddlePredictor(config); + + std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data"); + std::vector input_shape{ + FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height}; + auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration); + + int input_size = 1; + for (auto i : input_shape) { + input_size *= i; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + for (int j = 0; j < input_size; j++) { + data[j] = 0.f; + } + predictor->Run(); + } + + std::vector> out_rets; + out_rets.resize(FLAGS_iteration); + double cost_time = 0; + for (size_t i = 0; i < raw_data.size(); ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + memcpy(data, raw_data[i].data(), sizeof(float) * input_size); + + double start = GetCurrentUS(); + predictor->Run(); + cost_time += GetCurrentUS() - start; + + auto output_tensor = predictor->GetOutput(0); + auto output_shape = output_tensor->shape(); + auto output_data = output_tensor->data(); + ASSERT_EQ(output_shape.size(), 2UL); + ASSERT_EQ(output_shape[0], 1); + ASSERT_EQ(output_shape[1], 1000); + + int output_size = output_shape[0] * output_shape[1]; + out_rets[i].resize(output_size); + memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch + << ", iteration: " << FLAGS_iteration << ", spend " + << cost_time / FLAGS_iteration / 1000.0 << " ms in average."; + + std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt"); + float out_accuracy = CalOutAccuracy(out_rets, labels_dir); + ASSERT_GE(out_accuracy, 0.52f); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/api/test_mobilenetv2_fp32_huawei_kirin_npu.cc b/lite/tests/api/test_mobilenetv2_fp32_huawei_kirin_npu.cc new file mode 100644 index 0000000000..11fa9df420 --- /dev/null +++ b/lite/tests/api/test_mobilenetv2_fp32_huawei_kirin_npu.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/lite_api_test_helper.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/tests/api/ILSVRC2012_utility.h" +#include "lite/utils/cp_logging.h" + +DEFINE_string(data_dir, "", "data dir"); +DEFINE_int32(iteration, 100, "iteration times to run"); +DEFINE_int32(batch, 1, "batch of image"); +DEFINE_int32(channel, 3, "image channel"); + +namespace paddle { +namespace lite { + +TEST(MobileNetV2, test_mobilenetv2_fp32_huawei_kirin_npu) { + lite_api::CxxConfig config; + config.set_model_dir(FLAGS_model_dir); + config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}}); + auto predictor = lite_api::CreatePaddlePredictor(config); + + std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data"); + std::vector input_shape{ + FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height}; + auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration); + + int input_size = 1; + for (auto i : input_shape) { + input_size *= i; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + for (int j = 0; j < input_size; j++) { + data[j] = 0.f; + } + predictor->Run(); + } + + std::vector> out_rets; + out_rets.resize(FLAGS_iteration); + double cost_time = 0; + for (size_t i = 0; i < raw_data.size(); ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + memcpy(data, raw_data[i].data(), sizeof(float) * input_size); + + double start = GetCurrentUS(); + predictor->Run(); + cost_time += GetCurrentUS() - start; + + auto output_tensor = predictor->GetOutput(0); + auto output_shape = output_tensor->shape(); + auto output_data = output_tensor->data(); + ASSERT_EQ(output_shape.size(), 2UL); + ASSERT_EQ(output_shape[0], 1); + ASSERT_EQ(output_shape[1], 1000); + + int output_size = output_shape[0] * output_shape[1]; + out_rets[i].resize(output_size); + memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch + << ", iteration: " << FLAGS_iteration << ", spend " + << cost_time / FLAGS_iteration / 1000.0 << " ms in average."; + + std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt"); + float out_accuracy = CalOutAccuracy(out_rets, labels_dir); + ASSERT_GE(out_accuracy, 0.57f); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/api/test_resnet50_fp32_huawei_kirin_npu.cc b/lite/tests/api/test_resnet50_fp32_huawei_kirin_npu.cc new file mode 100644 index 0000000000..af48c5c5db --- /dev/null +++ b/lite/tests/api/test_resnet50_fp32_huawei_kirin_npu.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "lite/api/lite_api_test_helper.h" +#include "lite/api/paddle_api.h" +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/api/paddle_use_passes.h" +#include "lite/api/test_helper.h" +#include "lite/tests/api/ILSVRC2012_utility.h" +#include "lite/utils/cp_logging.h" + +DEFINE_string(data_dir, "", "data dir"); +DEFINE_int32(iteration, 100, "iteration times to run"); +DEFINE_int32(batch, 1, "batch of image"); +DEFINE_int32(channel, 3, "image channel"); + +namespace paddle { +namespace lite { + +TEST(ResNet50, test_resnet50_fp32_huawei_kirin_npu) { + lite_api::CxxConfig config; + config.set_model_dir(FLAGS_model_dir); + config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)}, + lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}}); + auto predictor = lite_api::CreatePaddlePredictor(config); + + std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data"); + std::vector input_shape{ + FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height}; + auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration); + + int input_size = 1; + for (auto i : input_shape) { + input_size *= i; + } + + for (int i = 0; i < FLAGS_warmup; ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + for (int j = 0; j < input_size; j++) { + data[j] = 0.f; + } + predictor->Run(); + } + + std::vector> out_rets; + out_rets.resize(FLAGS_iteration); + double cost_time = 0; + for (size_t i = 0; i < raw_data.size(); ++i) { + auto input_tensor = predictor->GetInput(0); + input_tensor->Resize( + std::vector(input_shape.begin(), input_shape.end())); + auto* data = input_tensor->mutable_data(); + memcpy(data, raw_data[i].data(), sizeof(float) * input_size); + + double start = GetCurrentUS(); + predictor->Run(); + cost_time += GetCurrentUS() - start; + + auto output_tensor = predictor->GetOutput(0); + auto output_shape = output_tensor->shape(); + auto output_data = output_tensor->data(); + ASSERT_EQ(output_shape.size(), 2UL); + ASSERT_EQ(output_shape[0], 1); + ASSERT_EQ(output_shape[1], 1000); + + int output_size = output_shape[0] * output_shape[1]; + out_rets[i].resize(output_size); + memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size); + } + + LOG(INFO) << "================== Speed Report ==================="; + LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads + << ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch + << ", iteration: " << FLAGS_iteration << ", spend " + << cost_time / FLAGS_iteration / 1000.0 << " ms in average."; + + std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt"); + float out_accuracy = CalOutAccuracy(out_rets, labels_dir); + ASSERT_GE(out_accuracy, 0.64f); +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 00fec722eb..f1601b5c45 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -1,99 +1,99 @@ -if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU AND NOT LITE_WITH_RKNPU) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) - lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_group_norm_compute SRCS group_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) +if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) + lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_group_norm_compute SRCS group_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_multiclass_nms_compute SRCS multiclass_nms_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_fill_constant_compute SRCS fill_constant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_fill_constant_batch_size_like_compute SRCS fill_constant_batch_size_like_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_multiclass_nms_compute SRCS multiclass_nms_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_fill_constant_compute SRCS fill_constant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_fill_constant_batch_size_like_compute SRCS fill_constant_batch_size_like_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) if(LITE_BUILD_EXTRA) - lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - #lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_conv_compute SRCS sequence_conv_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_sum_compute SRCS reduce_sum_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + #lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_conv_compute SRCS sequence_conv_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_sum_compute SRCS reduce_sum_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_pixel_shuffle_compute SRCS pixel_shuffle_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_scatter_compute SRCS scatter_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_pixel_shuffle_compute SRCS pixel_shuffle_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_scatter_compute SRCS scatter_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) # for training kernel if (LITE_WITH_TRAIN) - lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_activation_grad_compute SRCS activation_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_elementwise_grad_compute SRCS elementwise_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_mul_grad_compute SRCS mul_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sgd_compute SRCS sgd_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_pool_grad_compute SRCS sequence_pool_grad_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_activation_grad_compute SRCS activation_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_elementwise_grad_compute SRCS elementwise_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_mul_grad_compute SRCS mul_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sgd_compute SRCS sgd_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_pool_grad_compute SRCS sequence_pool_grad_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) endif() endif() - lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_is_empty_compute SRCS is_empty_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_flatten_compute SRCS flatten_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - #lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_is_empty_compute SRCS is_empty_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_flatten_compute SRCS flatten_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + #lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_uniform_random_compute SRCS uniform_random_compute_test.cc DEPS arena_framework ${lite_ops} ${host_kernels}) endif() diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 166137bf02..d5d8fd6c46 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -18,6 +18,10 @@ NUM_CORES_FOR_COMPILE=${LITE_BUILD_THREADS:-8} # global variables #whether to use emulator as adb devices,when USE_ADB_EMULATOR=ON we use emulator, else we will use connected mobile phone as adb devices. USE_ADB_EMULATOR=ON +# Use real android devices, set the device names for adb connection, ignored if USE_ADB_EMULATOR=ON +ADB_DEVICE_LIST="" +# The list of tests which are ignored, use commas to separate them, such as "test_cxx_api,test_mobilenetv1_int8" +TEST_SKIP_LIST="" LITE_WITH_COVERAGE=OFF # if operating in mac env, we should expand the maximum file num @@ -392,6 +396,380 @@ function build_test_xpu { test_xpu } +function is_available_adb_device { + local adb_device_name=$1 + if [[ -n "$adb_device_name" ]]; then + for line in `adb devices | grep -v "List" | awk '{print $1}'` + do + online_device_name=`echo $line | awk '{print $1}'` + if [[ "$adb_device_name" == "$online_device_name" ]];then + return 0 + fi + done + fi + return 1 +} + +function pick_an_available_adb_device { + local adb_device_list=$1 + local adb_device_names=(${adb_device_list//,/ }) + for adb_device_name in ${adb_device_names[@]}; do + is_available_adb_device $adb_device_name + if [[ $? -eq 0 ]]; then + echo $adb_device_name + return 0 + fi + done + echo "" + return 1 +} + +function run_test_case_on_adb_device { + local adb_device_name="" + local adb_work_dir="" + local target_name="" + local model_dir="" + local data_dir="" + local config_dir="" + # Extract arguments from command line + for i in "$@"; do + case $i in + --adb_device_name=*) + adb_device_name="${i#*=}" + shift + ;; + --adb_work_dir=*) + adb_work_dir="${i#*=}" + shift + ;; + --target_name=*) + target_name="${i#*=}" + shift + ;; + --model_dir=*) + model_dir="${i#*=}" + shift + ;; + --data_dir=*) + data_dir="${i#*=}" + shift + ;; + --config_dir=*) + config_dir="${i#*=}" + shift + ;; + *) + shift + ;; + esac + done + + # Check device is available + is_available_adb_device $adb_device_name + if [[ $? -ne 0 ]]; then + echo "$adb_device_name not found!" + exit 1 + fi + + # Be careful!!! Don't delete the root or system directories if the device is rooted. + if [[ -z "$adb_work_dir" ]]; then + echo "$adb_work_dir can't be empty!" + exit 1 + fi + if [[ "$adb_work_dir" == "/" ]]; then + echo "$adb_work_dir can't be root dir!" + exit 1 + fi + + # Copy the executable unit test to the remote device + local target_path=$(find ./lite -name $target_name) + if [[ -z "$target_path" ]]; then + echo "$target_name not found!" + exit 1 + fi + adb -s $adb_device_name shell "rm -f $adb_work_dir/$target_name" + adb -s $adb_device_name push $target_path $adb_work_dir + + local command_line="./$target_name" + # Copy the model files to the remote device + if [[ -n "$model_dir" ]]; then + local model_name=$(basename $model_dir) + adb -s $adb_device_name shell "rm -rf $adb_work_dir/$model_name" + adb -s $adb_device_name push $model_dir $adb_work_dir + command_line="$command_line --model_dir ./$model_name" + fi + + # Copy the test data files to the remote device + if [[ -n "$data_dir" ]]; then + local data_name=$(basename $data_dir) + adb -s $adb_device_name shell "rm -rf $adb_work_dir/$data_name" + adb -s $adb_device_name push $data_dir $adb_work_dir + command_line="$command_line --data_dir ./$data_name" + fi + + # Copy the config files to the remote device + if [[ -n "$config_dir" ]]; then + local config_name=$(basename $config_dir) + adb -s $adb_device_name shell "rm -rf $adb_work_dir/$config_name" + adb -s $adb_device_name push $config_dir $adb_work_dir + command_line="$command_line --config_dir ./$config_name" + fi + + # Run the model on the remote device + adb -s $adb_device_name shell "cd $adb_work_dir; export GLOG_v=5; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:. $command_line" +} + +function run_all_tests_on_adb_device { + local adb_device_list=$1 + local test_skip_list=$2 + local adb_work_dir=$3 + local sdk_root_dir=$4 + local test_arch_list=$5 + local test_toolchain_list=$6 + local build_targets_func=$7 + local prepare_devices_func=$8 + + # Pick the first available adb device from list + local adb_device_name=$(pick_an_available_adb_device $adb_device_list) + if [[ -z $adb_device_name ]]; then + echo "No adb device available!" + exit 1 + else + echo "Found a device $adb_device_name." + fi + + # Run all of unittests and model tests + local test_archs=(${test_arch_list//,/ }) + local test_toolchains=(${test_toolchain_list//,/ }) + local test_skip_names=(${test_skip_list//,/ }) + local test_model_params=(${test_model_list//:/ }) + for arch in $test_archs; do + for toolchain in $test_toolchains; do + # Build all tests and prepare device environment for running tests + echo "Build tests for MediaTek APU with $arch+$toolchain" + ${build_targets_func} $arch $toolchain $sdk_root_dir + ${prepare_devices_func} $adb_device_name $adb_work_dir $arch $toolchain $sdk_root_dir + # Run all of unit tests and model tests + for test_name in $(cat $TESTS_FILE); do + local is_skip=0 + for test_skip_name in ${test_skip_names[@]}; do + if [[ "$test_skip_name" == "$test_name" ]]; then + echo "skip " $test_name + is_skip=1 + break + fi + done + if [[ $is_skip -ne 0 ]]; then + continue + fi + # Extract the arguments from ctest command line + test_args=$(echo $(ctest -V -N -R ${test_name}) | sed "/.*${test_name} \"\(.*\)\".*/ s//\1/g") + run_test_case_on_adb_device --adb_device_name=$adb_device_name --adb_work_dir=$adb_work_dir --target_name=$test_name $test_args + done + cd - > /dev/null + done + done +} + +# Huawei Kirin NPU +function huawei_kirin_npu_prepare_device { + local adb_device_name=$1 + local adb_work_dir=$2 + local arch=$3 + local toolchain=$4 + local sdk_root_dir=$5 + + # Check device is available + is_available_adb_device $adb_device_name + if [[ $? -ne 0 ]]; then + echo "$adb_device_name not found!" + exit 1 + fi + + # Only root user can use HiAI runtime libraries in the android shell executables + adb -s $adb_device_name root + if [[ $? -ne 0 ]]; then + echo "$adb_device_name hasn't the root permission!" + exit 1 + fi + + # Copy the runtime libraries of HiAI DDK to the target device + local sdk_lib_dir="" + if [[ $arch == "armv8" ]]; then + sdk_lib_dir="$sdk_root_dir/lib64" + elif [[ $arch == "armv7" ]]; then + sdk_lib_dir="$sdk_root_dir/lib" + else + echo "$arch isn't supported by HiAI DDK!" + exit 1 + fi + adb -s $adb_device_name push $sdk_lib_dir/. $adb_work_dir +} + +function huawei_kirin_npu_build_targets { + local arch=$1 + local toolchain=$2 + local sdk_root_dir=$3 + + # Build all of tests + rm -rf ./build + mkdir -p ./build + cd ./build + prepare_workspace + cmake .. \ + -DWITH_GPU=OFF \ + -DWITH_MKL=OFF \ + -DWITH_LITE=ON \ + -DLITE_WITH_CUDA=OFF \ + -DLITE_WITH_X86=OFF \ + -DLITE_WITH_ARM=ON \ + -DWITH_ARM_DOTPROD=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ + -DWITH_TESTING=ON \ + -DLITE_BUILD_EXTRA=ON \ + -DLITE_WITH_TRAIN=ON \ + -DANDROID_STL_TYPE="c++_shared" \ + -DLITE_WITH_NPU=ON \ + -DNPU_DDK_ROOT="$sdk_root_dir" \ + -DARM_TARGET_OS="android" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain + make lite_compile_deps -j$NUM_CORES_FOR_COMPILE +} + +function huawei_kirin_npu_build_and_test { + run_all_tests_on_adb_device $1 $2 "/data/local/tmp" "$(readlink -f ./hiai_ddk_lib_330)" "armv7" "gcc,clang" huawei_kirin_npu_build_targets huawei_kirin_npu_prepare_device +} + +# Rockchip NPU +function rockchip_npu_prepare_device { + local adb_device_name=$1 + local adb_work_dir=$2 + local arch=$3 + local toolchain=$4 + local sdk_root_dir=$5 + + # Check device is available + is_available_adb_device $adb_device_name + if [[ $? -ne 0 ]]; then + echo "$adb_device_name not found!" + exit 1 + fi + + # Use high performance mode + adb -s $adb_device_name shell "echo userspace > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" + adb -s $adb_device_name shell "echo 1608000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed" + adb -s $adb_device_name shell "echo userspace > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor" + adb -s $adb_device_name shell "echo 1608000 > /sys/devices/system/cpu/cpu1/cpufreq/scaling_setspeed" + + # Copy the runtime libraries of Rockchip NPU to the target device + local sdk_lib_dir="" + if [[ $arch == "armv8" ]]; then + sdk_lib_dir="$sdk_root_dir/lib64" + elif [[ $arch == "armv7" ]]; then + sdk_lib_dir="$sdk_root_dir/lib" + else + echo "$arch isn't supported by Rockchip NPU SDK!" + exit 1 + fi + adb -s $adb_device_name push $sdk_lib_dir/. $adb_work_dir +} + +function rockchip_npu_build_targets { + local arch=$1 + local toolchain=$2 + local sdk_root_dir=$3 + + # Build all of tests + rm -rf ./build + mkdir -p ./build + cd ./build + prepare_workspace + cmake .. \ + -DWITH_GPU=OFF \ + -DWITH_MKL=OFF \ + -DWITH_LITE=ON \ + -DLITE_WITH_CUDA=OFF \ + -DLITE_WITH_X86=OFF \ + -DLITE_WITH_ARM=ON \ + -DWITH_ARM_DOTPROD=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ + -DWITH_TESTING=ON \ + -DLITE_BUILD_EXTRA=ON \ + -DLITE_WITH_TRAIN=ON \ + -DLITE_WITH_RKNPU=ON \ + -DRKNPU_DDK_ROOT="$sdk_root_dir" \ + -DARM_TARGET_OS="armlinux" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain + make lite_compile_deps -j$NUM_CORES_FOR_COMPILE +} + +function rockchip_npu_build_and_test { + run_all_tests_on_adb_device $1 $2 "/userdata/bin" "$(readlink -f ./rknpu_ddk)" "armv8" "gcc" rockchip_npu_build_targets rockchip_npu_prepare_device +} + +# MediaTek APU +function mediatek_apu_prepare_device { + local adb_device_name=$1 + local adb_work_dir=$2 + local arch=$3 + local toolchain=$4 + local sdk_root_dir=$5 + + # Check device is available + is_available_adb_device $adb_device_name + if [[ $? -ne 0 ]]; then + echo "$adb_device_name not found!" + exit 1 + fi + + # Use high performance mode + adb -s $adb_device_name root + if [[ $? -ne 0 ]]; then + echo "$adb_device_name hasn't the root permission!" + exit 1 + fi + adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor" + adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor" + adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor" + adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor" + adb -s $adb_device_name shell "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq" + adb -s $adb_device_name shell "echo 800000 > /proc/gpufreq/gpufreq_opp_freq" + adb -s $adb_device_name shell "echo dvfs_debug 0 > /sys/kernel/debug/vpu/power" + adb -s $adb_device_name shell "echo 0 > /sys/devices/platform/soc/10012000.dvfsrc/helio-dvfsrc/dvfsrc_force_vcore_dvfs_opp" + adb -s $adb_device_name shell "echo 0 > /sys/module/mmdvfs_pmqos/parameters/force_step" + adb -s $adb_device_name shell "echo 0 > /proc/sys/kernel/printk" +} + +function mediatek_apu_build_targets { + local arch=$1 + local toolchain=$2 + local sdk_root_dir=$3 + + # Build all of tests + rm -rf ./build + mkdir -p ./build + cd ./build + prepare_workspace + cmake .. \ + -DWITH_GPU=OFF \ + -DWITH_MKL=OFF \ + -DWITH_LITE=ON \ + -DLITE_WITH_CUDA=OFF \ + -DLITE_WITH_X86=OFF \ + -DLITE_WITH_ARM=ON \ + -DWITH_ARM_DOTPROD=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ + -DWITH_TESTING=ON \ + -DLITE_BUILD_EXTRA=ON \ + -DLITE_WITH_TRAIN=ON \ + -DLITE_WITH_APU=ON \ + -DAPU_DDK_ROOT="$sdk_root_dir" \ + -DARM_TARGET_OS="android" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain + make lite_compile_deps -j$NUM_CORES_FOR_COMPILE +} + +function mediatek_apu_build_and_test { + run_all_tests_on_adb_device $1 $2 "/data/local/tmp" "$(readlink -f ./apu_ddk)" "armv7" "gcc" mediatek_apu_build_targets mediatek_apu_prepare_device +} + function cmake_huawei_ascend_npu { export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/third_party/install/mklml/lib" prepare_workspace @@ -478,77 +856,6 @@ function test_arm_android { adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}" } -# test_npu -function test_npu { - local test_name=$1 - local device=$2 - if [[ "${test_name}x" == "x" ]]; then - echo "test_name can not be empty" - exit 1 - fi - if [[ "${device}x" == "x" ]]; then - echo "Port can not be empty" - exit 1 - fi - - echo "test name: ${test_name}" - adb_work_dir="/data/local/tmp" - - skip_list=("test_model_parser" "test_mobilenetv1" "test_mobilenetv2" "test_resnet50" "test_inceptionv4" "test_light_api" "test_apis" "test_paddle_api" "test_cxx_api" "test_gen_code") - for skip_name in ${skip_list[@]} ; do - [[ $skip_name =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && echo "skip $test_name" && return - done - - local testpath=$(find ./lite -name ${test_name}) - - # note the ai_ddk_lib is under paddle-lite root directory - adb -s ${device} push ../ai_ddk_lib/lib64/* ${adb_work_dir} - adb -s ${device} push ${testpath} ${adb_work_dir} - - if [[ ${test_name} == "test_npu_pass" ]]; then - local model_name=mobilenet_v1 - adb -s ${device} push "./third_party/install/${model_name}" ${adb_work_dir} - adb -s ${device} shell "rm -rf ${adb_work_dir}/${model_name}_opt " - adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; export GLOG_v=0; ./${test_name} --model_dir=./${model_name} --optimized_model=./${model_name}_opt" - elif [[ ${test_name} == "test_subgraph_pass" ]]; then - local model_name=mobilenet_v1 - adb -s ${device} push "./third_party/install/${model_name}" ${adb_work_dir} - adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; export GLOG_v=0; ./${test_name} --model_dir=./${model_name}" - else - adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; ./${test_name}" - fi -} - -function test_npu_model { - local test_name=$1 - local device=$2 - local model_dir=$3 - - if [[ "${test_name}x" == "x" ]]; then - echo "test_name can not be empty" - exit 1 - fi - if [[ "${device}x" == "x" ]]; then - echo "Port can not be empty" - exit 1 - fi - if [[ "${model_dir}x" == "x" ]]; then - echo "Model dir can not be empty" - exit 1 - fi - - echo "test name: ${test_name}" - adb_work_dir="/data/local/tmp" - - testpath=$(find ./lite -name ${test_name}) - adb -s ${device} push ../ai_ddk_lib/lib64/* ${adb_work_dir} - adb -s ${device} push ${model_dir} ${adb_work_dir} - adb -s ${device} push ${testpath} ${adb_work_dir} - adb -s ${device} shell chmod +x "${adb_work_dir}/${test_name}" - local adb_model_path="${adb_work_dir}/`basename ${model_dir}`" - adb -s ${device} shell "export LD_LIBRARY_PATH=${adb_work_dir}; ${adb_work_dir}/${test_name} --model_dir=$adb_model_path" -} - # test the inference high level api function test_arm_api { local device=$1 @@ -643,32 +950,6 @@ function _test_paddle_code_generator { $adb shell $remote_test --optimized_model $remote_model --generated_code_file $ADB_WORK_DIR/gen_code.cc } -function cmake_npu { - prepare_workspace - # $1: ARM_TARGET_OS in "android" , "armlinux" - # $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf" - # $3: ARM_TARGET_LANG in "gcc" "clang" - - # NPU libs need API LEVEL 24 above - build_dir=`pwd` - - cmake .. \ - -DWITH_GPU=OFF \ - -DWITH_MKL=OFF \ - -DWITH_LITE=ON \ - -DLITE_WITH_CUDA=OFF \ - -DLITE_WITH_X86=OFF \ - -DLITE_WITH_ARM=ON \ - -DWITH_ARM_DOTPROD=ON \ - -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ - -DWITH_TESTING=ON \ - -DLITE_WITH_NPU=ON \ - -DANDROID_API_LEVEL=24 \ - -DLITE_BUILD_EXTRA=ON \ - -DNPU_DDK_ROOT="${build_dir}/../ai_ddk_lib/" \ - -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3 -} - function cmake_arm { prepare_workspace # $1: ARM_TARGET_OS in "android" , "armlinux" @@ -756,31 +1037,6 @@ function build_ios { cd - } -# $1: ARM_TARGET_OS in "android" -# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" -# $3: ARM_TARGET_LANG in "gcc" "clang" -# $4: test_name -function build_npu { - os=$1 - abi=$2 - lang=$3 - local test_name=$4 - - cur_dir=$(pwd) - - build_dir=$cur_dir/build.lite.npu.${os}.${abi}.${lang} - mkdir -p $build_dir - cd $build_dir - - cmake_npu ${os} ${abi} ${lang} - - if [[ "${test_name}x" != "x" ]]; then - build_single $test_name - else - build $TESTS_FILE - fi -} - # $1: ARM_TARGET_OS in "android" , "armlinux" # $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf" # $3: ARM_TARGET_LANG in "gcc" "clang" @@ -1029,42 +1285,6 @@ function build_test_arm { build_test_arm_subtask_armlinux } -function build_test_npu { - local test_name=$1 - local port_armv8=5554 - local port_armv7=5556 - local os=android - local abi=armv8 - local lang=gcc - - local test_model_name=test_mobilenetv1 - local model_name=mobilenet_v1 - cur_dir=$(pwd) - - build_npu "android" "armv8" "gcc" $test_name - - # just test the model on armv8 - # prepare_emulator $port_armv8 - - prepare_emulator $port_armv8 $port_armv7 - local device_armv8=emulator-$port_armv8 - - if [[ "${test_name}x" != "x" ]]; then - test_npu ${test_name} ${device_armv8} - else - # run_gen_code_test ${port_armv8} - for _test in $(cat $TESTS_FILE | grep npu); do - test_npu $_test $device_armv8 - done - fi - - test_npu_model $test_model_name $device_armv8 "./third_party/install/$model_name" - cd - - # just test the model on armv8 - # adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done - echo "Done" -} - function mobile_publish { # only check os=android abi=armv8 lang=gcc now local os=android @@ -1147,6 +1367,21 @@ function main { USE_ADB_EMULATOR="${i#*=}" shift ;; + --adb_device_list=*) + ADB_DEVICE_LIST="${i#*=}" + if [[ -n $ADB_DEVICE_LIST && $USE_ADB_EMULATOR != "OFF" ]]; then + set +x + echo + echo -e "Need to set USE_ADB_EMULATOR=OFF if '--adb_device_list' is specified." + echo + exit 1 + fi + shift + ;; + --test_skip_list=*) + TEST_SKIP_LIST="${i#*=}" + shift + ;; --lite_with_coverage=*) LITE_WITH_COVERAGE="${i#*=}" shift @@ -1192,10 +1427,6 @@ function main { test_arm $ARM_OS $ARM_ABI $ARM_LANG $ARM_PORT shift ;; - test_npu) - test_npu $TEST_NAME $ARM_PORT - shift - ;; test_arm_android) test_arm_android $TEST_NAME $ARM_PORT shift @@ -1225,6 +1456,18 @@ function main { build_test_xpu ON shift ;; + huawei_kirin_npu_build_and_test) + huawei_kirin_npu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST + shift + ;; + rockchip_npu_build_and_test) + rockchip_npu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST + shift + ;; + mediatek_apu_build_and_test) + mediatek_apu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST + shift + ;; build_test_huawei_ascend_npu) build_test_huawei_ascend_npu shift -- GitLab