From 38b7e29fdba88a52823cef28dedc406722acb801 Mon Sep 17 00:00:00 2001 From: "cen.li" Date: Thu, 28 Nov 2019 11:11:19 +0800 Subject: [PATCH] * add bm kernel * fix code_style test=develop --- cmake/lite.cmake | 32 ++++++++++--- lite/api/CMakeLists.txt | 12 ++++- lite/api/test_resnet50_lite_bm.cc | 3 +- lite/core/mir/pass_utils.cc | 2 - lite/gen_code/CMakeLists.txt | 2 + lite/kernels/bm/CMakeLists.txt | 6 ++- lite/kernels/bm/calib_compute.cc | 77 +++++++++++++++++++++++++++++++ lite/kernels/bm/calib_compute.h | 51 ++++++++++++++++++++ lite/kernels/bm/conv_compute.cc | 71 ++++++++++++++++++++++++++++ lite/kernels/bm/conv_compute.h | 46 ++++++++++++++++++ 10 files changed, 289 insertions(+), 13 deletions(-) create mode 100644 lite/kernels/bm/calib_compute.cc create mode 100644 lite/kernels/bm/calib_compute.h create mode 100644 lite/kernels/bm/conv_compute.cc create mode 100644 lite/kernels/bm/conv_compute.h diff --git a/cmake/lite.cmake b/cmake/lite.cmake index 98dbc9ab7e..4423e27e1a 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -22,7 +22,7 @@ endfunction() function (lite_deps TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS ARGS) + set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS ARGS) cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(deps ${lite_deps_DEPS}) @@ -94,6 +94,12 @@ function (lite_deps TARGET) endforeach(var) endif() + if (LITE_WITH_BM) + foreach(var ${lite_deps_BM_DEPS}) + set(deps ${deps} ${var}) + endforeach(var) + endif() + set(${TARGET} ${deps} PARENT_SCOPE) endfunction() @@ -118,7 +124,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean function(lite_cc_library TARGET) set(options SHARED shared STATIC static MODULE module) set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -132,6 +138,7 @@ function(lite_cc_library TARGET) XPU_DEPS ${args_XPU_DEPS} ARM_DEPS ${args_ARM_DEPS} FPGA_DEPS ${args_FPGA_DEPS} + BM_DEPS ${args_BM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} @@ -159,7 +166,7 @@ endfunction() function(lite_cc_binary TARGET) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -171,6 +178,7 @@ function(lite_cc_binary TARGET) CL_DEPS ${args_CL_DEPS} ARM_DEPS ${args_ARM_DEPS} FPGA_DEPS ${args_FPGA_DEPS} + BM_DEPS ${args_BM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} @@ -203,7 +211,7 @@ function(lite_cc_test TARGET) endif() set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS COMPILE_LEVEL # (basic|extra) @@ -223,6 +231,7 @@ function(lite_cc_test TARGET) CL_DEPS ${args_CL_DEPS} ARM_DEPS ${args_ARM_DEPS} FPGA_DEPS ${args_FPGA_DEPS} + BM_DEPS ${args_BM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} @@ -249,6 +258,7 @@ set(x86_kernels CACHE INTERNAL "x86 kernels") set(fpga_kernels CACHE INTERNAL "fpga kernels") set(npu_kernels CACHE INTERNAL "npu kernels") set(xpu_kernels CACHE INTERNAL "xpu kernels") +set(bm_kernels CACHE INTERNAL "bm kernels") set(opencl_kernels CACHE INTERNAL "opencl kernels") set(host_kernels CACHE INTERNAL "host kernels") @@ -259,12 +269,12 @@ if(LITE_BUILD_TAILOR) file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list) endif() # add a kernel for some specific device -# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA) +# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA, BM) # level: one of (basic, extra) function(add_kernel TARGET device level) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -330,6 +340,12 @@ function(add_kernel TARGET device level) endif() set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "") endif() + if ("${device}" STREQUAL "BM") + if (NOT LITE_WITH_BM) + return() + endif() + set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "") + endif() if ("${device}" STREQUAL "OPENCL") if (NOT LITE_WITH_OPENCL) return() @@ -362,6 +378,7 @@ function(add_kernel TARGET device level) CL_DEPS ${args_CL_DEPS} ARM_DEPS ${args_ARM_DEPS} FPGA_DEPS ${args_FPGA_DEPS} + BM_DEPS ${args_BM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} @@ -380,7 +397,7 @@ endif() function(add_operator TARGET level) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS BM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -411,6 +428,7 @@ function(add_operator TARGET level) CL_DEPS ${args_CL_DEPS} ARM_DEPS ${args_ARM_DEPS} FPGA_DEPS ${args_FPGA_DEPS} + BM_DEPS ${args_BM_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS} HVY_DEPS ${args_HVY_DEPS} diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index a98cd6881a..aef0fc396e 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -58,6 +58,11 @@ if(LITE_WITH_FPGA) set(cxx_api_deps ${cxx_api_deps} ${fpga_deps}) endif() +if(LITE_WITH_BM) + set(light_api_deps ${light_api_deps} ${bm_deps}) + set(cxx_api_deps ${cxx_api_deps} ${bm_deps}) +endif() + message(STATUS "get ops ${ops}") message(STATUS "get X86 kernels ${x86_kernels}") message(STATUS "get Host kernels ${host_kernels}") @@ -79,7 +84,8 @@ if (NOT LITE_ON_TINY_PUBLISH) NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass CL_DEPS ${opencl_kenrels} - FPGA_DEPS ${fpga_kenrels}) + FPGA_DEPS ${fpga_kenrels} + BM_DEPS ${bm_kenrels}) endif() # for light api @@ -245,6 +251,7 @@ lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api program mir_passes paddle_api_light CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} + BM_DEPS ${bm_kernels} ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) lite_cc_test(test_apis SRCS apis_test.cc @@ -253,6 +260,7 @@ lite_cc_test(test_apis SRCS apis_test.cc X86_DEPS ${x86_kernels} XPU_DEPS ${xpu_kernels} FPGA_DEPS ${fpga_kernels} + BM_DEPS ${bm_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) @@ -298,6 +306,7 @@ if(NOT IOS) XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} + BM_DEPS ${bm_kernels} X86_DEPS ${x86_kernels}) lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils ${ops} ${host_kernels} @@ -305,6 +314,7 @@ if(NOT IOS) NPU_DEPS ${npu_kernels} XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} + BM_DEPS ${bm_kernels} FPGA_DEPS ${fpga_kernels} X86_DEPS ${x86_kernels}) endif() diff --git a/lite/api/test_resnet50_lite_bm.cc b/lite/api/test_resnet50_lite_bm.cc index f637a55aae..4084386a1f 100644 --- a/lite/api/test_resnet50_lite_bm.cc +++ b/lite/api/test_resnet50_lite_bm.cc @@ -107,8 +107,7 @@ void TestModel(const std::vector& valid_places) { TEST(ResNet50, test_bm) { std::vector valid_places({ - Place{TARGET(kBM), PRECISION(kInt8)}, - Place{TARGET(kBM), PRECISION(kFloat)}, + Place{TARGET(kBM), PRECISION(kInt8)} }); TestModel(valid_places); diff --git a/lite/core/mir/pass_utils.cc b/lite/core/mir/pass_utils.cc index 775e52aae0..4f6be2c186 100644 --- a/lite/core/mir/pass_utils.cc +++ b/lite/core/mir/pass_utils.cc @@ -54,7 +54,6 @@ bool PassMatchesTarget(const mir::Pass& pass, TargetType target) { } bool PassMatchesKernels(const mir::Pass& pass) { -#if 0 const auto& kernels = pass.GetBoundKernels(); for (const auto& kernel : kernels) { for (const auto& place : kernel.second) { @@ -63,7 +62,6 @@ bool PassMatchesKernels(const mir::Pass& pass) { } } } -#endif return true; } diff --git a/lite/gen_code/CMakeLists.txt b/lite/gen_code/CMakeLists.txt index 40c9541554..56c70cf1e1 100644 --- a/lite/gen_code/CMakeLists.txt +++ b/lite/gen_code/CMakeLists.txt @@ -18,6 +18,7 @@ lite_cc_test(test_gen_code SRCS gen_code_test.cc XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} + BM_DEPS ${bm_kernels} EXCLUDE_COMPILE_DEPS "ON" ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) @@ -46,6 +47,7 @@ lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_co XPU_DEPS ${xpu_kernels} CL_DEPS ${opencl_kernels} FPGA_DEPS ${fpga_kernels} + BM_DEPS ${bm_kernels} EXCLUDE_COMPILE_DEPS "ON" ) diff --git a/lite/kernels/bm/CMakeLists.txt b/lite/kernels/bm/CMakeLists.txt index baa798d741..3fcf383979 100644 --- a/lite/kernels/bm/CMakeLists.txt +++ b/lite/kernels/bm/CMakeLists.txt @@ -1,5 +1,9 @@ - if(NOT LITE_WITH_BM) return () endif() +add_kernel(conv_2d_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps}) + +message(STATUS "compile with lite BM kernels") + diff --git a/lite/kernels/bm/calib_compute.cc b/lite/kernels/bm/calib_compute.cc new file mode 100644 index 0000000000..c54e04e5ef --- /dev/null +++ b/lite/kernels/bm/calib_compute.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/bm/calib_compute.h" +#include +#include "lite/core/op_registry.h" +#include "lite/core/type_system.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace bm { + +void CalibComputeFp32ToInt8::Run() { + return; +} + +void CalibComputeInt8ToFp32::Run() { + return; +} + +} // namespace bm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(calib, + kBM, + kInt8, + kNCHW, + paddle::lite::kernels::bm::CalibComputeFp32ToInt8, + fp32_to_int8) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))}) + .Finalize(); + +REGISTER_LITE_KERNEL(calib, + kBM, + kInt8, + kNCHW, + paddle::lite::kernels::bm::CalibComputeInt8ToFp32, + int8_to_fp32) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .Finalize(); +REGISTER_LITE_KERNEL(calib_once, + kBM, + kInt8, + kNCHW, + paddle::lite::kernels::bm::CalibComputeFp32ToInt8, + fp32_to_int8) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))}) + .Finalize(); + +REGISTER_LITE_KERNEL(calib_once, + kBM, + kInt8, + kNCHW, + paddle::lite::kernels::bm::CalibComputeInt8ToFp32, + int8_to_fp32) + .BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .Finalize(); diff --git a/lite/kernels/bm/calib_compute.h b/lite/kernels/bm/calib_compute.h new file mode 100644 index 0000000000..8a5bf057c0 --- /dev/null +++ b/lite/kernels/bm/calib_compute.h @@ -0,0 +1,51 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" +#include "lite/operators/calib_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace bm { + +class CalibComputeFp32ToInt8 + : public KernelLite { + public: + using param_t = operators::CalibParam; + + void Run() override; + + ~CalibComputeFp32ToInt8() override{}; + + private: +}; + +class CalibComputeInt8ToFp32 + : public KernelLite { + public: + using param_t = operators::CalibParam; + + void Run() override; + + ~CalibComputeInt8ToFp32() override{}; + + private: +}; + +} // namespace bm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/kernels/bm/conv_compute.cc b/lite/kernels/bm/conv_compute.cc new file mode 100644 index 0000000000..644dabe201 --- /dev/null +++ b/lite/kernels/bm/conv_compute.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/bm/conv_compute.h" +#include +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace bm { + +template class ConvComputeInt8; +template class ConvComputeInt8; + +} // namespace bm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL( + conv2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ConvCompute, def) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kFloat), + DATALAYOUT(kNCHW))}) + .BindInput("Bias", + {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .BindInput("Filter", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kFloat), + DATALAYOUT(kNCHW))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kFloat), + DATALAYOUT(kNCHW))}) + .Finalize(); + +REGISTER_LITE_KERNEL( + conv2d, + kBM, + kInt8, + kNCHW, + paddle::lite::kernels::bm::ConvComputeInt8, + int8_out) + .BindInput("Input", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kInt8), + DATALAYOUT(kNCHW))}) + .BindInput("Bias", + {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))}) + .BindInput("Filter", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kInt8), + DATALAYOUT(kNCHW))}) + .BindOutput("Output", + {LiteType::GetTensorTy(TARGET(kBM), + PRECISION(kInt8), + DATALAYOUT(kNCHW))}) + .Finalize(); diff --git a/lite/kernels/bm/conv_compute.h b/lite/kernels/bm/conv_compute.h new file mode 100644 index 0000000000..bf5f7d7aa6 --- /dev/null +++ b/lite/kernels/bm/conv_compute.h @@ -0,0 +1,46 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace bm { + +class ConvCompute : public KernelLite { + public: + using param_t = operators::ConvParam; + + void PrepareForRun() {}; + void Run() {}; + virtual ~ConvCompute() = default; +}; + +template +class ConvComputeInt8 + : public KernelLite { + public: + using param_t = operators::ConvParam; + + void PrepareForRun() {}; + void Run() {}; + virtual ~ConvComputeInt8() = default; +}; + +} // namespace bm +} // namespace kernels +} // namespace lite +} // namespace paddle -- GitLab