未验证 提交 76410577 编写于 作者: H hong19860320 提交者: GitHub

[CI] Enable CI for Huawei kirin NPU, Rockchip NPU and MediaTek APU (#4408)

上级 d5e7e73e
......@@ -38,34 +38,31 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND NOT LITE_ON_TINY_PUBLISH)
endif()
if (WITH_TESTING)
set(LITE_URL_FOR_UNITTESTS "http://paddle-inference-dist.bj.bcebos.com/PaddleLite/models_and_data_for_unittests")
# models
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz")
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1_int16.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "resnet50.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "MobileNetV1_quant.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "transformer_with_mask_fp32.tar.gz")
endif()
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "mobilenet_v1_int8_for_mediatek_apu.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "mobilenet_v1_int8_for_rockchip_npu.tar.gz")
else()
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "GoogleNet_inference.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v1.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "step_rnn.tar.gz")
set(LITE_URL_FOR_UNITTESTS "http://paddle-inference-dist.bj.bcebos.com/PaddleLite/models_and_data_for_unittests")
# models
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "resnet50.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ernie.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "GoogLeNet.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "VGG19.tar.gz")
# data
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ILSVRC2012_small.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert_data.tar.gz")
endif()
# data
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "ILSVRC2012_small.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL_FOR_UNITTESTS} "bert_data.tar.gz")
endif()
# ----------------------------- PUBLISH -----------------------------
......
......@@ -6,5 +6,5 @@ endif()
lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest)
if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
if(LITE_WITH_ARM)
lite_cc_test(test_transformer_with_mask_fp32_arm SRCS test_transformer_with_mask_fp32_arm.cc
function(lite_cc_test_with_model_and_data TARGET)
if(NOT WITH_TESTING)
return()
endif()
set(options "")
set(oneValueArgs MODEL DATA CONFIG ARGS)
set(multiValueArgs "")
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(ARGS "")
if(DEFINED args_MODEL)
set(ARGS "${ARGS} --model_dir=${LITE_MODEL_DIR}/${args_MODEL}")
endif()
if(DEFINED args_DATA)
set(ARGS "${ARGS} --data_dir=${LITE_MODEL_DIR}/${args_DATA}")
endif()
if(DEFINED args_CONFIG)
set(ARGS "${ARGS} --config_dir=${LITE_MODEL_DIR}/${args_CONFIG}")
endif()
if(DEFINED args_ARGS)
set(ARGS "${ARGS} ${args_ARGS}")
endif()
lite_cc_test(${TARGET} SRCS ${TARGET}.cc
DEPS ${lite_model_test_DEPS} paddle_api_full
ARM_DEPS ${arm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/transformer_with_mask_fp32 SERIAL)
if(WITH_TESTING)
add_dependencies(test_transformer_with_mask_fp32_arm extern_lite_download_transformer_with_mask_fp32_tar_gz)
X86_DEPS ${x86_kernels}
NPU_DEPS ${npu_kernels} ${npu_bridges}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels} ${huawei_ascend_npu_bridges}
XPU_DEPS ${xpu_kernels} ${xpu_bridges}
APU_DEPS ${apu_kernels} ${apu_bridges}
RKNPU_DEPS ${rknpu_kernels} ${rknpu_bridges}
BM_DEPS ${bm_kernels} ${bm_bridges}
MLU_DEPS ${mlu_kernels} ${mlu_bridges}
ARGS ${ARGS} SERIAL)
if(DEFINED args_MODEL)
add_dependencies(${TARGET} extern_lite_download_${args_MODEL}_tar_gz)
endif()
endif()
function(xpu_x86_without_xtcl_test TARGET MODEL DATA)
if(${DATA} STREQUAL "")
lite_cc_test(${TARGET} SRCS ${TARGET}.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels} ${xpu_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/${MODEL})
else()
lite_cc_test(${TARGET} SRCS ${TARGET}.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels} ${xpu_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/${MODEL} --data_dir=${LITE_MODEL_DIR}/${DATA})
if(DEFINED args_DATA)
add_dependencies(${TARGET} extern_lite_download_${args_DATA}_tar_gz)
endif()
if(WITH_TESTING)
add_dependencies(${TARGET} extern_lite_download_${MODEL}_tar_gz)
if(NOT ${DATA} STREQUAL "")
add_dependencies(${TARGET} extern_lite_download_${DATA}_tar_gz)
endif()
if(DEFINED args_CONFIG)
add_dependencies(${TARGET} extern_lite_download_${args_CONFIG}_tar_gz)
endif()
endfunction()
if(LITE_WITH_ARM)
lite_cc_test_with_model_and_data(test_transformer_with_mask_fp32_arm MODEL transformer_with_mask_fp32 ARGS)
endif()
if(LITE_WITH_NPU)
lite_cc_test_with_model_and_data(test_mobilenetv1_fp32_huawei_kirin_npu MODEL mobilenet_v1 DATA ILSVRC2012_small)
lite_cc_test_with_model_and_data(test_mobilenetv2_fp32_huawei_kirin_npu MODEL mobilenet_v2_relu DATA ILSVRC2012_small)
lite_cc_test_with_model_and_data(test_resnet50_fp32_huawei_kirin_npu MODEL resnet50 DATA ILSVRC2012_small)
endif()
if(LITE_WITH_XPU AND NOT LITE_WITH_XTCL)
xpu_x86_without_xtcl_test(test_resnet50_fp32_xpu resnet50 ILSVRC2012_small)
xpu_x86_without_xtcl_test(test_googlenet_fp32_xpu GoogLeNet ILSVRC2012_small)
xpu_x86_without_xtcl_test(test_vgg19_fp32_xpu VGG19 ILSVRC2012_small)
xpu_x86_without_xtcl_test(test_ernie_fp32_xpu ernie bert_data)
xpu_x86_without_xtcl_test(test_bert_fp32_xpu bert bert_data)
lite_cc_test_with_model_and_data(test_resnet50_fp32_xpu MODEL resnet50 DATA ILSVRC2012_small)
lite_cc_test_with_model_and_data(test_googlenet_fp32_xpu MODEL GoogLeNet DATA ILSVRC2012_small)
lite_cc_test_with_model_and_data(test_vgg19_fp32_xpu MODEL VGG19 DATA ILSVRC2012_small)
lite_cc_test_with_model_and_data(test_ernie_fp32_xpu MODEL ernie DATA bert_data)
lite_cc_test_with_model_and_data(test_bert_fp32_xpu MODEL bert DATA bert_data)
endif()
if(LITE_WITH_RKNPU)
lite_cc_test(test_mobilenetv1_int8_rknpu SRCS test_mobilenetv1_int8_rknpu.cc
DEPS ${lite_model_test_DEPS} paddle_api_full
RKNPU_DEPS ${rknpu_kernels} ${rknpu_bridges}
ARGS --model_dir=${LITE_MODEL_DIR}/MobilenetV1_full_quant SERIAL)
lite_cc_test_with_model_and_data(test_mobilenetv1_int8_rockchip_npu MODEL mobilenet_v1_int8_for_rockchip_npu DATA ILSVRC2012_small)
endif()
if(LITE_WITH_APU)
lite_cc_test(test_mobilenetv1_int8_apu SRCS test_mobilenetv1_int8_apu.cc
DEPS ${lite_model_test_DEPS} paddle_api_full
APU_DEPS ${apu_kernels} ${apu_bridges}
ARGS --model_dir=${LITE_MODEL_DIR}/MobilenetV1_full_quant SERIAL)
lite_cc_test_with_model_and_data(test_mobilenetv1_int8_mediatek_apu MODEL mobilenet_v1_int8_for_mediatek_apu DATA ILSVRC2012_small)
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/ILSVRC2012_utility.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(data_dir, "", "data dir");
DEFINE_int32(iteration, 100, "iteration times to run");
DEFINE_int32(batch, 1, "batch of image");
DEFINE_int32(channel, 3, "image channel");
namespace paddle {
namespace lite {
TEST(MobileNetV1, test_mobilenetv1_fp32_huawei_kirin_npu) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
std::vector<int> input_shape{
FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height};
auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration);
int input_size = 1;
for (auto i : input_shape) {
input_size *= i;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
for (int j = 0; j < input_size; j++) {
data[j] = 0.f;
}
predictor->Run();
}
std::vector<std::vector<float>> out_rets;
out_rets.resize(FLAGS_iteration);
double cost_time = 0;
for (size_t i = 0; i < raw_data.size(); ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
memcpy(data, raw_data[i].data(), sizeof(float) * input_size);
double start = GetCurrentUS();
predictor->Run();
cost_time += GetCurrentUS() - start;
auto output_tensor = predictor->GetOutput(0);
auto output_shape = output_tensor->shape();
auto output_data = output_tensor->data<float>();
ASSERT_EQ(output_shape.size(), 2UL);
ASSERT_EQ(output_shape[0], 1);
ASSERT_EQ(output_shape[1], 1000);
int output_size = output_shape[0] * output_shape[1];
out_rets[i].resize(output_size);
memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size);
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch
<< ", iteration: " << FLAGS_iteration << ", spend "
<< cost_time / FLAGS_iteration / 1000.0 << " ms in average.";
std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt");
float out_accuracy = CalOutAccuracy(out_rets, labels_dir);
ASSERT_GE(out_accuracy, 0.57f);
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
using namespace paddle::lite_api; // NOLINT
inline double GetCurrentUS() {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
}
inline int64_t ShapeProduction(std::vector<int64_t> shape) {
int64_t s = 1;
for (int64_t dim : shape) {
s *= dim;
}
return s;
}
int main(int argc, char** argv) {
if (argc < 2) {
std::cerr << "[ERROR] usage: ./" << argv[0]
<< " model_dir [thread_num] [warmup_times] [repeat_times] "
"[input_data_path] [output_data_path]"
<< std::endl;
return -1;
}
std::string model_dir = argv[1];
int thread_num = 1;
if (argc > 2) {
thread_num = atoi(argv[2]);
}
int warmup_times = 5;
if (argc > 3) {
warmup_times = atoi(argv[3]);
}
int repeat_times = 10;
if (argc > 4) {
repeat_times = atoi(argv[4]);
}
std::string input_data_path;
if (argc > 5) {
input_data_path = argv[5];
}
std::string output_data_path;
if (argc > 6) {
output_data_path = argv[6];
}
paddle::lite_api::CxxConfig config;
config.set_model_dir(model_dir);
config.set_threads(thread_num);
config.set_power_mode(paddle::lite_api::LITE_POWER_HIGH);
config.set_valid_places(
{paddle::lite_api::Place{
TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kAPU), PRECISION(kInt8), DATALAYOUT(kNCHW)}});
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
std::unique_ptr<paddle::lite_api::Tensor> input_tensor(
std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 3, 224, 224});
auto input_data = input_tensor->mutable_data<float>();
auto input_size = ShapeProduction(input_tensor->shape());
// test loop
int total_imgs = 500;
float test_num = 0;
float top1_num = 0;
float top5_num = 0;
int output_len = 1000;
std::vector<int> index(1000);
bool debug = true; // false;
int show_step = 500;
for (int i = 0; i < total_imgs; i++) {
// set input
std::string filename = input_data_path + "/" + std::to_string(i);
std::ifstream fs(filename, std::ifstream::binary);
if (!fs.is_open()) {
std::cout << "open input file fail.";
}
auto input_data_tmp = input_data;
for (int i = 0; i < input_size; ++i) {
fs.read(reinterpret_cast<char*>(input_data_tmp), sizeof(*input_data_tmp));
input_data_tmp++;
}
int label = 0;
fs.read(reinterpret_cast<char*>(&label), sizeof(label));
fs.close();
if (debug && i % show_step == 0) {
std::cout << "input data:" << std::endl;
std::cout << input_data[0] << " " << input_data[10] << " "
<< input_data[input_size - 1] << std::endl;
std::cout << "label:" << label << std::endl;
}
// run
predictor->Run();
auto output0 = predictor->GetOutput(0);
auto output0_data = output0->data<float>();
// get output
std::iota(index.begin(), index.end(), 0);
std::stable_sort(
index.begin(), index.end(), [output0_data](size_t i1, size_t i2) {
return output0_data[i1] > output0_data[i2];
});
test_num++;
if (label == index[0]) {
top1_num++;
}
for (int i = 0; i < 5; i++) {
if (label == index[i]) {
top5_num++;
}
}
if (debug && i % show_step == 0) {
std::cout << index[0] << " " << index[1] << " " << index[2] << " "
<< index[3] << " " << index[4] << std::endl;
std::cout << output0_data[index[0]] << " " << output0_data[index[1]]
<< " " << output0_data[index[2]] << " "
<< output0_data[index[3]] << " " << output0_data[index[4]]
<< std::endl;
std::cout << output0_data[630] << std::endl;
}
if (i % show_step == 0) {
std::cout << "step " << i << "; top1 acc:" << top1_num / test_num
<< "; top5 acc:" << top5_num / test_num << std::endl;
}
}
std::cout << "final result:" << std::endl;
std::cout << "top1 acc:" << top1_num / test_num << std::endl;
std::cout << "top5 acc:" << top5_num / test_num << std::endl;
return 0;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/ILSVRC2012_utility.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(data_dir, "", "data dir");
DEFINE_int32(iteration, 100, "iteration times to run");
DEFINE_int32(batch, 1, "batch of image");
DEFINE_int32(channel, 3, "image channel");
namespace paddle {
namespace lite {
TEST(MobileNetV1, test_mobilenetv1_int8_mediatek_apu) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kARM), PRECISION(kInt8)},
lite_api::Place{TARGET(kAPU), PRECISION(kInt8)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
std::vector<int> input_shape{
FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height};
auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration);
int input_size = 1;
for (auto i : input_shape) {
input_size *= i;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
for (int j = 0; j < input_size; j++) {
data[j] = 0.f;
}
predictor->Run();
}
std::vector<std::vector<float>> out_rets;
out_rets.resize(FLAGS_iteration);
double cost_time = 0;
for (size_t i = 0; i < raw_data.size(); ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
memcpy(data, raw_data[i].data(), sizeof(float) * input_size);
double start = GetCurrentUS();
predictor->Run();
cost_time += GetCurrentUS() - start;
auto output_tensor = predictor->GetOutput(0);
auto output_shape = output_tensor->shape();
auto output_data = output_tensor->data<float>();
ASSERT_EQ(output_shape.size(), 2UL);
ASSERT_EQ(output_shape[0], 1);
ASSERT_EQ(output_shape[1], 1000);
int output_size = output_shape[0] * output_shape[1];
out_rets[i].resize(output_size);
memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size);
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch
<< ", iteration: " << FLAGS_iteration << ", spend "
<< cost_time / FLAGS_iteration / 1000.0 << " ms in average.";
std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt");
float out_accuracy = CalOutAccuracy(out_rets, labels_dir);
ASSERT_GE(out_accuracy, 0.55f);
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/time.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
inline double GetCurrentUS() {
struct timeval time;
gettimeofday(&time, NULL);
return 1e+6 * time.tv_sec + time.tv_usec;
}
inline int64_t ShapeProduction(std::vector<int64_t> shape) {
int64_t s = 1;
for (int64_t dim : shape) {
s *= dim;
}
return s;
}
int main(int argc, char** argv) {
if (argc < 2) {
std::cerr << "[ERROR] usage: ./" << argv[0]
<< " model_dir [thread_num] [warmup_times] [repeat_times] "
"[input_data_path] [output_data_path]"
<< std::endl;
return -1;
}
std::string model_dir = argv[1];
int thread_num = 1;
if (argc > 2) {
thread_num = atoi(argv[2]);
}
int warmup_times = 5;
if (argc > 3) {
warmup_times = atoi(argv[3]);
}
int repeat_times = 10;
if (argc > 4) {
repeat_times = atoi(argv[4]);
}
std::string input_data_path;
if (argc > 5) {
input_data_path = argv[5];
}
std::string output_data_path;
if (argc > 6) {
output_data_path = argv[6];
}
paddle::lite_api::CxxConfig config;
config.set_model_dir(model_dir);
config.set_threads(thread_num);
config.set_power_mode(paddle::lite_api::LITE_POWER_HIGH);
config.set_valid_places(
{paddle::lite_api::Place{
TARGET(kARM), PRECISION(kFloat), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kARM), PRECISION(kInt8), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kRKNPU), PRECISION(kInt8), DATALAYOUT(kNCHW)}});
auto predictor = paddle::lite_api::CreatePaddlePredictor(config);
std::unique_ptr<paddle::lite_api::Tensor> input_tensor(
std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 3, 224, 224});
auto input_data = input_tensor->mutable_data<float>();
auto input_size = ShapeProduction(input_tensor->shape());
if (input_data_path.empty()) {
for (int i = 0; i < input_size; i++) {
input_data[i] = 1;
}
} else {
std::fstream fs(input_data_path, std::ios::in);
if (!fs.is_open()) {
std::cerr << "open input data file failed." << std::endl;
return -1;
}
for (int i = 0; i < input_size; i++) {
fs >> input_data[i];
}
}
for (int i = 0; i < warmup_times; ++i) {
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < repeat_times; ++i) {
predictor->Run();
}
std::cout << "Model: " << model_dir << ", threads num " << thread_num
<< ", warmup times: " << warmup_times
<< ", repeat times: " << repeat_times << ", spend "
<< (GetCurrentUS() - start) / repeat_times / 1000.0
<< " ms in average." << std::endl;
std::unique_ptr<const paddle::lite_api::Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
auto output_data = output_tensor->data<float>();
auto output_size = ShapeProduction(output_tensor->shape());
std::cout << "output data:";
for (int i = 0; i < output_size; i += 100) {
std::cout << "[" << i << "] " << output_data[i] << std::endl;
}
return 0;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/ILSVRC2012_utility.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(data_dir, "", "data dir");
DEFINE_int32(iteration, 100, "iteration times to run");
DEFINE_int32(batch, 1, "batch of image");
DEFINE_int32(channel, 3, "image channel");
namespace paddle {
namespace lite {
TEST(MobileNetV1, test_mobilenetv1_int8_rockchip_apu) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kARM), PRECISION(kInt8)},
lite_api::Place{TARGET(kRKNPU), PRECISION(kInt8)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
std::vector<int> input_shape{
FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height};
auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration);
int input_size = 1;
for (auto i : input_shape) {
input_size *= i;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
for (int j = 0; j < input_size; j++) {
data[j] = 0.f;
}
predictor->Run();
}
std::vector<std::vector<float>> out_rets;
out_rets.resize(FLAGS_iteration);
double cost_time = 0;
for (size_t i = 0; i < raw_data.size(); ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
memcpy(data, raw_data[i].data(), sizeof(float) * input_size);
double start = GetCurrentUS();
predictor->Run();
cost_time += GetCurrentUS() - start;
auto output_tensor = predictor->GetOutput(0);
auto output_shape = output_tensor->shape();
auto output_data = output_tensor->data<float>();
ASSERT_EQ(output_shape.size(), 2UL);
ASSERT_EQ(output_shape[0], 1);
ASSERT_EQ(output_shape[1], 1000);
int output_size = output_shape[0] * output_shape[1];
out_rets[i].resize(output_size);
memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size);
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch
<< ", iteration: " << FLAGS_iteration << ", spend "
<< cost_time / FLAGS_iteration / 1000.0 << " ms in average.";
std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt");
float out_accuracy = CalOutAccuracy(out_rets, labels_dir);
ASSERT_GE(out_accuracy, 0.52f);
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/ILSVRC2012_utility.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(data_dir, "", "data dir");
DEFINE_int32(iteration, 100, "iteration times to run");
DEFINE_int32(batch, 1, "batch of image");
DEFINE_int32(channel, 3, "image channel");
namespace paddle {
namespace lite {
TEST(MobileNetV2, test_mobilenetv2_fp32_huawei_kirin_npu) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
std::vector<int> input_shape{
FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height};
auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration);
int input_size = 1;
for (auto i : input_shape) {
input_size *= i;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
for (int j = 0; j < input_size; j++) {
data[j] = 0.f;
}
predictor->Run();
}
std::vector<std::vector<float>> out_rets;
out_rets.resize(FLAGS_iteration);
double cost_time = 0;
for (size_t i = 0; i < raw_data.size(); ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
memcpy(data, raw_data[i].data(), sizeof(float) * input_size);
double start = GetCurrentUS();
predictor->Run();
cost_time += GetCurrentUS() - start;
auto output_tensor = predictor->GetOutput(0);
auto output_shape = output_tensor->shape();
auto output_data = output_tensor->data<float>();
ASSERT_EQ(output_shape.size(), 2UL);
ASSERT_EQ(output_shape[0], 1);
ASSERT_EQ(output_shape[1], 1000);
int output_size = output_shape[0] * output_shape[1];
out_rets[i].resize(output_size);
memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size);
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch
<< ", iteration: " << FLAGS_iteration << ", spend "
<< cost_time / FLAGS_iteration / 1000.0 << " ms in average.";
std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt");
float out_accuracy = CalOutAccuracy(out_rets, labels_dir);
ASSERT_GE(out_accuracy, 0.57f);
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/ILSVRC2012_utility.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(data_dir, "", "data dir");
DEFINE_int32(iteration, 100, "iteration times to run");
DEFINE_int32(batch, 1, "batch of image");
DEFINE_int32(channel, 3, "image channel");
namespace paddle {
namespace lite {
TEST(ResNet50, test_resnet50_fp32_huawei_kirin_npu) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
lite_api::Place{TARGET(kNPU), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string raw_data_dir = FLAGS_data_dir + std::string("/raw_data");
std::vector<int> input_shape{
FLAGS_batch, FLAGS_channel, FLAGS_im_width, FLAGS_im_height};
auto raw_data = ReadRawData(raw_data_dir, input_shape, FLAGS_iteration);
int input_size = 1;
for (auto i : input_shape) {
input_size *= i;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
for (int j = 0; j < input_size; j++) {
data[j] = 0.f;
}
predictor->Run();
}
std::vector<std::vector<float>> out_rets;
out_rets.resize(FLAGS_iteration);
double cost_time = 0;
for (size_t i = 0; i < raw_data.size(); ++i) {
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(
std::vector<int64_t>(input_shape.begin(), input_shape.end()));
auto* data = input_tensor->mutable_data<float>();
memcpy(data, raw_data[i].data(), sizeof(float) * input_size);
double start = GetCurrentUS();
predictor->Run();
cost_time += GetCurrentUS() - start;
auto output_tensor = predictor->GetOutput(0);
auto output_shape = output_tensor->shape();
auto output_data = output_tensor->data<float>();
ASSERT_EQ(output_shape.size(), 2UL);
ASSERT_EQ(output_shape[0], 1);
ASSERT_EQ(output_shape[1], 1000);
int output_size = output_shape[0] * output_shape[1];
out_rets[i].resize(output_size);
memcpy(&(out_rets[i].at(0)), output_data, sizeof(float) * output_size);
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", batch: " << FLAGS_batch
<< ", iteration: " << FLAGS_iteration << ", spend "
<< cost_time / FLAGS_iteration / 1000.0 << " ms in average.";
std::string labels_dir = FLAGS_data_dir + std::string("/labels.txt");
float out_accuracy = CalOutAccuracy(out_rets, labels_dir);
ASSERT_GE(out_accuracy, 0.64f);
}
} // namespace lite
} // namespace paddle
if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU AND NOT LITE_WITH_RKNPU) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_group_norm_compute SRCS group_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LITE_WITH_MLU) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_group_norm_compute SRCS group_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_multiclass_nms_compute SRCS multiclass_nms_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fill_constant_compute SRCS fill_constant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fill_constant_batch_size_like_compute SRCS fill_constant_batch_size_like_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_multiclass_nms_compute SRCS multiclass_nms_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fill_constant_compute SRCS fill_constant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fill_constant_batch_size_like_compute SRCS fill_constant_batch_size_like_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA)
lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_conv_compute SRCS sequence_conv_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_sum_compute SRCS reduce_sum_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_conv_compute SRCS sequence_conv_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_sum_compute SRCS reduce_sum_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pixel_shuffle_compute SRCS pixel_shuffle_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_scatter_compute SRCS scatter_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_dequant_compute SRCS lookup_table_dequant_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_ctc_align_compute SRCS ctc_align_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pixel_shuffle_compute SRCS pixel_shuffle_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_scatter_compute SRCS scatter_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
# for training kernel
if (LITE_WITH_TRAIN)
lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_activation_grad_compute SRCS activation_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_elementwise_grad_compute SRCS elementwise_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_grad_compute SRCS mul_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sgd_compute SRCS sgd_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_grad_compute SRCS sequence_pool_grad_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mean_compute SRCS mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_activation_grad_compute SRCS activation_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_elementwise_grad_compute SRCS elementwise_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_grad_compute SRCS mul_grad_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sgd_compute SRCS sgd_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_grad_compute SRCS sequence_pool_grad_compute_test.cc DEPS ${bm_kernels} arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
endif()
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_is_empty_compute SRCS is_empty_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_flatten_compute SRCS flatten_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_interp_compute SRCS interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_is_empty_compute SRCS is_empty_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_flatten_compute SRCS flatten_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_uniform_random_compute SRCS uniform_random_compute_test.cc DEPS arena_framework ${lite_ops} ${host_kernels})
endif()
......@@ -18,6 +18,10 @@ NUM_CORES_FOR_COMPILE=${LITE_BUILD_THREADS:-8}
# global variables
#whether to use emulator as adb devices,when USE_ADB_EMULATOR=ON we use emulator, else we will use connected mobile phone as adb devices.
USE_ADB_EMULATOR=ON
# Use real android devices, set the device names for adb connection, ignored if USE_ADB_EMULATOR=ON
ADB_DEVICE_LIST=""
# The list of tests which are ignored, use commas to separate them, such as "test_cxx_api,test_mobilenetv1_int8"
TEST_SKIP_LIST=""
LITE_WITH_COVERAGE=OFF
# if operating in mac env, we should expand the maximum file num
......@@ -392,6 +396,380 @@ function build_test_xpu {
test_xpu
}
function is_available_adb_device {
local adb_device_name=$1
if [[ -n "$adb_device_name" ]]; then
for line in `adb devices | grep -v "List" | awk '{print $1}'`
do
online_device_name=`echo $line | awk '{print $1}'`
if [[ "$adb_device_name" == "$online_device_name" ]];then
return 0
fi
done
fi
return 1
}
function pick_an_available_adb_device {
local adb_device_list=$1
local adb_device_names=(${adb_device_list//,/ })
for adb_device_name in ${adb_device_names[@]}; do
is_available_adb_device $adb_device_name
if [[ $? -eq 0 ]]; then
echo $adb_device_name
return 0
fi
done
echo ""
return 1
}
function run_test_case_on_adb_device {
local adb_device_name=""
local adb_work_dir=""
local target_name=""
local model_dir=""
local data_dir=""
local config_dir=""
# Extract arguments from command line
for i in "$@"; do
case $i in
--adb_device_name=*)
adb_device_name="${i#*=}"
shift
;;
--adb_work_dir=*)
adb_work_dir="${i#*=}"
shift
;;
--target_name=*)
target_name="${i#*=}"
shift
;;
--model_dir=*)
model_dir="${i#*=}"
shift
;;
--data_dir=*)
data_dir="${i#*=}"
shift
;;
--config_dir=*)
config_dir="${i#*=}"
shift
;;
*)
shift
;;
esac
done
# Check device is available
is_available_adb_device $adb_device_name
if [[ $? -ne 0 ]]; then
echo "$adb_device_name not found!"
exit 1
fi
# Be careful!!! Don't delete the root or system directories if the device is rooted.
if [[ -z "$adb_work_dir" ]]; then
echo "$adb_work_dir can't be empty!"
exit 1
fi
if [[ "$adb_work_dir" == "/" ]]; then
echo "$adb_work_dir can't be root dir!"
exit 1
fi
# Copy the executable unit test to the remote device
local target_path=$(find ./lite -name $target_name)
if [[ -z "$target_path" ]]; then
echo "$target_name not found!"
exit 1
fi
adb -s $adb_device_name shell "rm -f $adb_work_dir/$target_name"
adb -s $adb_device_name push $target_path $adb_work_dir
local command_line="./$target_name"
# Copy the model files to the remote device
if [[ -n "$model_dir" ]]; then
local model_name=$(basename $model_dir)
adb -s $adb_device_name shell "rm -rf $adb_work_dir/$model_name"
adb -s $adb_device_name push $model_dir $adb_work_dir
command_line="$command_line --model_dir ./$model_name"
fi
# Copy the test data files to the remote device
if [[ -n "$data_dir" ]]; then
local data_name=$(basename $data_dir)
adb -s $adb_device_name shell "rm -rf $adb_work_dir/$data_name"
adb -s $adb_device_name push $data_dir $adb_work_dir
command_line="$command_line --data_dir ./$data_name"
fi
# Copy the config files to the remote device
if [[ -n "$config_dir" ]]; then
local config_name=$(basename $config_dir)
adb -s $adb_device_name shell "rm -rf $adb_work_dir/$config_name"
adb -s $adb_device_name push $config_dir $adb_work_dir
command_line="$command_line --config_dir ./$config_name"
fi
# Run the model on the remote device
adb -s $adb_device_name shell "cd $adb_work_dir; export GLOG_v=5; LD_LIBRARY_PATH=$LD_LIBRARY_PATH:. $command_line"
}
function run_all_tests_on_adb_device {
local adb_device_list=$1
local test_skip_list=$2
local adb_work_dir=$3
local sdk_root_dir=$4
local test_arch_list=$5
local test_toolchain_list=$6
local build_targets_func=$7
local prepare_devices_func=$8
# Pick the first available adb device from list
local adb_device_name=$(pick_an_available_adb_device $adb_device_list)
if [[ -z $adb_device_name ]]; then
echo "No adb device available!"
exit 1
else
echo "Found a device $adb_device_name."
fi
# Run all of unittests and model tests
local test_archs=(${test_arch_list//,/ })
local test_toolchains=(${test_toolchain_list//,/ })
local test_skip_names=(${test_skip_list//,/ })
local test_model_params=(${test_model_list//:/ })
for arch in $test_archs; do
for toolchain in $test_toolchains; do
# Build all tests and prepare device environment for running tests
echo "Build tests for MediaTek APU with $arch+$toolchain"
${build_targets_func} $arch $toolchain $sdk_root_dir
${prepare_devices_func} $adb_device_name $adb_work_dir $arch $toolchain $sdk_root_dir
# Run all of unit tests and model tests
for test_name in $(cat $TESTS_FILE); do
local is_skip=0
for test_skip_name in ${test_skip_names[@]}; do
if [[ "$test_skip_name" == "$test_name" ]]; then
echo "skip " $test_name
is_skip=1
break
fi
done
if [[ $is_skip -ne 0 ]]; then
continue
fi
# Extract the arguments from ctest command line
test_args=$(echo $(ctest -V -N -R ${test_name}) | sed "/.*${test_name} \"\(.*\)\".*/ s//\1/g")
run_test_case_on_adb_device --adb_device_name=$adb_device_name --adb_work_dir=$adb_work_dir --target_name=$test_name $test_args
done
cd - > /dev/null
done
done
}
# Huawei Kirin NPU
function huawei_kirin_npu_prepare_device {
local adb_device_name=$1
local adb_work_dir=$2
local arch=$3
local toolchain=$4
local sdk_root_dir=$5
# Check device is available
is_available_adb_device $adb_device_name
if [[ $? -ne 0 ]]; then
echo "$adb_device_name not found!"
exit 1
fi
# Only root user can use HiAI runtime libraries in the android shell executables
adb -s $adb_device_name root
if [[ $? -ne 0 ]]; then
echo "$adb_device_name hasn't the root permission!"
exit 1
fi
# Copy the runtime libraries of HiAI DDK to the target device
local sdk_lib_dir=""
if [[ $arch == "armv8" ]]; then
sdk_lib_dir="$sdk_root_dir/lib64"
elif [[ $arch == "armv7" ]]; then
sdk_lib_dir="$sdk_root_dir/lib"
else
echo "$arch isn't supported by HiAI DDK!"
exit 1
fi
adb -s $adb_device_name push $sdk_lib_dir/. $adb_work_dir
}
function huawei_kirin_npu_build_targets {
local arch=$1
local toolchain=$2
local sdk_root_dir=$3
# Build all of tests
rm -rf ./build
mkdir -p ./build
cd ./build
prepare_workspace
cmake .. \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_TRAIN=ON \
-DANDROID_STL_TYPE="c++_shared" \
-DLITE_WITH_NPU=ON \
-DNPU_DDK_ROOT="$sdk_root_dir" \
-DARM_TARGET_OS="android" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain
make lite_compile_deps -j$NUM_CORES_FOR_COMPILE
}
function huawei_kirin_npu_build_and_test {
run_all_tests_on_adb_device $1 $2 "/data/local/tmp" "$(readlink -f ./hiai_ddk_lib_330)" "armv7" "gcc,clang" huawei_kirin_npu_build_targets huawei_kirin_npu_prepare_device
}
# Rockchip NPU
function rockchip_npu_prepare_device {
local adb_device_name=$1
local adb_work_dir=$2
local arch=$3
local toolchain=$4
local sdk_root_dir=$5
# Check device is available
is_available_adb_device $adb_device_name
if [[ $? -ne 0 ]]; then
echo "$adb_device_name not found!"
exit 1
fi
# Use high performance mode
adb -s $adb_device_name shell "echo userspace > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"
adb -s $adb_device_name shell "echo 1608000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed"
adb -s $adb_device_name shell "echo userspace > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor"
adb -s $adb_device_name shell "echo 1608000 > /sys/devices/system/cpu/cpu1/cpufreq/scaling_setspeed"
# Copy the runtime libraries of Rockchip NPU to the target device
local sdk_lib_dir=""
if [[ $arch == "armv8" ]]; then
sdk_lib_dir="$sdk_root_dir/lib64"
elif [[ $arch == "armv7" ]]; then
sdk_lib_dir="$sdk_root_dir/lib"
else
echo "$arch isn't supported by Rockchip NPU SDK!"
exit 1
fi
adb -s $adb_device_name push $sdk_lib_dir/. $adb_work_dir
}
function rockchip_npu_build_targets {
local arch=$1
local toolchain=$2
local sdk_root_dir=$3
# Build all of tests
rm -rf ./build
mkdir -p ./build
cd ./build
prepare_workspace
cmake .. \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_TRAIN=ON \
-DLITE_WITH_RKNPU=ON \
-DRKNPU_DDK_ROOT="$sdk_root_dir" \
-DARM_TARGET_OS="armlinux" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain
make lite_compile_deps -j$NUM_CORES_FOR_COMPILE
}
function rockchip_npu_build_and_test {
run_all_tests_on_adb_device $1 $2 "/userdata/bin" "$(readlink -f ./rknpu_ddk)" "armv8" "gcc" rockchip_npu_build_targets rockchip_npu_prepare_device
}
# MediaTek APU
function mediatek_apu_prepare_device {
local adb_device_name=$1
local adb_work_dir=$2
local arch=$3
local toolchain=$4
local sdk_root_dir=$5
# Check device is available
is_available_adb_device $adb_device_name
if [[ $? -ne 0 ]]; then
echo "$adb_device_name not found!"
exit 1
fi
# Use high performance mode
adb -s $adb_device_name root
if [[ $? -ne 0 ]]; then
echo "$adb_device_name hasn't the root permission!"
exit 1
fi
adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"
adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor"
adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor"
adb -s $adb_device_name shell "echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor"
adb -s $adb_device_name shell "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq"
adb -s $adb_device_name shell "echo 800000 > /proc/gpufreq/gpufreq_opp_freq"
adb -s $adb_device_name shell "echo dvfs_debug 0 > /sys/kernel/debug/vpu/power"
adb -s $adb_device_name shell "echo 0 > /sys/devices/platform/soc/10012000.dvfsrc/helio-dvfsrc/dvfsrc_force_vcore_dvfs_opp"
adb -s $adb_device_name shell "echo 0 > /sys/module/mmdvfs_pmqos/parameters/force_step"
adb -s $adb_device_name shell "echo 0 > /proc/sys/kernel/printk"
}
function mediatek_apu_build_targets {
local arch=$1
local toolchain=$2
local sdk_root_dir=$3
# Build all of tests
rm -rf ./build
mkdir -p ./build
cd ./build
prepare_workspace
cmake .. \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_TRAIN=ON \
-DLITE_WITH_APU=ON \
-DAPU_DDK_ROOT="$sdk_root_dir" \
-DARM_TARGET_OS="android" -DARM_TARGET_ARCH_ABI=$arch -DARM_TARGET_LANG=$toolchain
make lite_compile_deps -j$NUM_CORES_FOR_COMPILE
}
function mediatek_apu_build_and_test {
run_all_tests_on_adb_device $1 $2 "/data/local/tmp" "$(readlink -f ./apu_ddk)" "armv7" "gcc" mediatek_apu_build_targets mediatek_apu_prepare_device
}
function cmake_huawei_ascend_npu {
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/third_party/install/mklml/lib"
prepare_workspace
......@@ -478,77 +856,6 @@ function test_arm_android {
adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}"
}
# test_npu <some_test_name> <adb_port_number>
function test_npu {
local test_name=$1
local device=$2
if [[ "${test_name}x" == "x" ]]; then
echo "test_name can not be empty"
exit 1
fi
if [[ "${device}x" == "x" ]]; then
echo "Port can not be empty"
exit 1
fi
echo "test name: ${test_name}"
adb_work_dir="/data/local/tmp"
skip_list=("test_model_parser" "test_mobilenetv1" "test_mobilenetv2" "test_resnet50" "test_inceptionv4" "test_light_api" "test_apis" "test_paddle_api" "test_cxx_api" "test_gen_code")
for skip_name in ${skip_list[@]} ; do
[[ $skip_name =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && echo "skip $test_name" && return
done
local testpath=$(find ./lite -name ${test_name})
# note the ai_ddk_lib is under paddle-lite root directory
adb -s ${device} push ../ai_ddk_lib/lib64/* ${adb_work_dir}
adb -s ${device} push ${testpath} ${adb_work_dir}
if [[ ${test_name} == "test_npu_pass" ]]; then
local model_name=mobilenet_v1
adb -s ${device} push "./third_party/install/${model_name}" ${adb_work_dir}
adb -s ${device} shell "rm -rf ${adb_work_dir}/${model_name}_opt "
adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; export GLOG_v=0; ./${test_name} --model_dir=./${model_name} --optimized_model=./${model_name}_opt"
elif [[ ${test_name} == "test_subgraph_pass" ]]; then
local model_name=mobilenet_v1
adb -s ${device} push "./third_party/install/${model_name}" ${adb_work_dir}
adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; export GLOG_v=0; ./${test_name} --model_dir=./${model_name}"
else
adb -s ${device} shell "cd ${adb_work_dir}; export LD_LIBRARY_PATH=./ ; ./${test_name}"
fi
}
function test_npu_model {
local test_name=$1
local device=$2
local model_dir=$3
if [[ "${test_name}x" == "x" ]]; then
echo "test_name can not be empty"
exit 1
fi
if [[ "${device}x" == "x" ]]; then
echo "Port can not be empty"
exit 1
fi
if [[ "${model_dir}x" == "x" ]]; then
echo "Model dir can not be empty"
exit 1
fi
echo "test name: ${test_name}"
adb_work_dir="/data/local/tmp"
testpath=$(find ./lite -name ${test_name})
adb -s ${device} push ../ai_ddk_lib/lib64/* ${adb_work_dir}
adb -s ${device} push ${model_dir} ${adb_work_dir}
adb -s ${device} push ${testpath} ${adb_work_dir}
adb -s ${device} shell chmod +x "${adb_work_dir}/${test_name}"
local adb_model_path="${adb_work_dir}/`basename ${model_dir}`"
adb -s ${device} shell "export LD_LIBRARY_PATH=${adb_work_dir}; ${adb_work_dir}/${test_name} --model_dir=$adb_model_path"
}
# test the inference high level api
function test_arm_api {
local device=$1
......@@ -643,32 +950,6 @@ function _test_paddle_code_generator {
$adb shell $remote_test --optimized_model $remote_model --generated_code_file $ADB_WORK_DIR/gen_code.cc
}
function cmake_npu {
prepare_workspace
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
# NPU libs need API LEVEL 24 above
build_dir=`pwd`
cmake .. \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DLITE_WITH_NPU=ON \
-DANDROID_API_LEVEL=24 \
-DLITE_BUILD_EXTRA=ON \
-DNPU_DDK_ROOT="${build_dir}/../ai_ddk_lib/" \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3
}
function cmake_arm {
prepare_workspace
# $1: ARM_TARGET_OS in "android" , "armlinux"
......@@ -756,31 +1037,6 @@ function build_ios {
cd -
}
# $1: ARM_TARGET_OS in "android"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7"
# $3: ARM_TARGET_LANG in "gcc" "clang"
# $4: test_name
function build_npu {
os=$1
abi=$2
lang=$3
local test_name=$4
cur_dir=$(pwd)
build_dir=$cur_dir/build.lite.npu.${os}.${abi}.${lang}
mkdir -p $build_dir
cd $build_dir
cmake_npu ${os} ${abi} ${lang}
if [[ "${test_name}x" != "x" ]]; then
build_single $test_name
else
build $TESTS_FILE
fi
}
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
......@@ -1029,42 +1285,6 @@ function build_test_arm {
build_test_arm_subtask_armlinux
}
function build_test_npu {
local test_name=$1
local port_armv8=5554
local port_armv7=5556
local os=android
local abi=armv8
local lang=gcc
local test_model_name=test_mobilenetv1
local model_name=mobilenet_v1
cur_dir=$(pwd)
build_npu "android" "armv8" "gcc" $test_name
# just test the model on armv8
# prepare_emulator $port_armv8
prepare_emulator $port_armv8 $port_armv7
local device_armv8=emulator-$port_armv8
if [[ "${test_name}x" != "x" ]]; then
test_npu ${test_name} ${device_armv8}
else
# run_gen_code_test ${port_armv8}
for _test in $(cat $TESTS_FILE | grep npu); do
test_npu $_test $device_armv8
done
fi
test_npu_model $test_model_name $device_armv8 "./third_party/install/$model_name"
cd -
# just test the model on armv8
# adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done
echo "Done"
}
function mobile_publish {
# only check os=android abi=armv8 lang=gcc now
local os=android
......@@ -1147,6 +1367,21 @@ function main {
USE_ADB_EMULATOR="${i#*=}"
shift
;;
--adb_device_list=*)
ADB_DEVICE_LIST="${i#*=}"
if [[ -n $ADB_DEVICE_LIST && $USE_ADB_EMULATOR != "OFF" ]]; then
set +x
echo
echo -e "Need to set USE_ADB_EMULATOR=OFF if '--adb_device_list' is specified."
echo
exit 1
fi
shift
;;
--test_skip_list=*)
TEST_SKIP_LIST="${i#*=}"
shift
;;
--lite_with_coverage=*)
LITE_WITH_COVERAGE="${i#*=}"
shift
......@@ -1192,10 +1427,6 @@ function main {
test_arm $ARM_OS $ARM_ABI $ARM_LANG $ARM_PORT
shift
;;
test_npu)
test_npu $TEST_NAME $ARM_PORT
shift
;;
test_arm_android)
test_arm_android $TEST_NAME $ARM_PORT
shift
......@@ -1225,6 +1456,18 @@ function main {
build_test_xpu ON
shift
;;
huawei_kirin_npu_build_and_test)
huawei_kirin_npu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST
shift
;;
rockchip_npu_build_and_test)
rockchip_npu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST
shift
;;
mediatek_apu_build_and_test)
mediatek_apu_build_and_test $ADB_DEVICE_LIST $TEST_SKIP_LIST
shift
;;
build_test_huawei_ascend_npu)
build_test_huawei_ascend_npu
shift
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册