提交 38b7e29f 编写于 作者: C cen.li

* add bm kernel * fix code_style test=develop

上级 14e6c781
......@@ -22,7 +22,7 @@ endfunction()
function (lite_deps TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS ARGS)
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps ${lite_deps_DEPS})
......@@ -94,6 +94,12 @@ function (lite_deps TARGET)
endforeach(var)
endif()
if (LITE_WITH_BM)
foreach(var ${lite_deps_BM_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()
set(${TARGET} ${deps} PARENT_SCOPE)
endfunction()
......@@ -118,7 +124,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -132,6 +138,7 @@ function(lite_cc_library TARGET)
XPU_DEPS ${args_XPU_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -159,7 +166,7 @@ endfunction()
function(lite_cc_binary TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -171,6 +178,7 @@ function(lite_cc_binary TARGET)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -203,7 +211,7 @@ function(lite_cc_test TARGET)
endif()
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS
COMPILE_LEVEL # (basic|extra)
......@@ -223,6 +231,7 @@ function(lite_cc_test TARGET)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -249,6 +258,7 @@ set(x86_kernels CACHE INTERNAL "x86 kernels")
set(fpga_kernels CACHE INTERNAL "fpga kernels")
set(npu_kernels CACHE INTERNAL "npu kernels")
set(xpu_kernels CACHE INTERNAL "xpu kernels")
set(bm_kernels CACHE INTERNAL "bm kernels")
set(opencl_kernels CACHE INTERNAL "opencl kernels")
set(host_kernels CACHE INTERNAL "host kernels")
......@@ -259,12 +269,12 @@ if(LITE_BUILD_TAILOR)
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA, BM)
# level: one of (basic, extra)
function(add_kernel TARGET device level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -330,6 +340,12 @@ function(add_kernel TARGET device level)
endif()
set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "BM")
if (NOT LITE_WITH_BM)
return()
endif()
set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "OPENCL")
if (NOT LITE_WITH_OPENCL)
return()
......@@ -362,6 +378,7 @@ function(add_kernel TARGET device level)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -380,7 +397,7 @@ endif()
function(add_operator TARGET level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS BM_DEPS FPGA_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -411,6 +428,7 @@ function(add_operator TARGET level)
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......
......@@ -58,6 +58,11 @@ if(LITE_WITH_FPGA)
set(cxx_api_deps ${cxx_api_deps} ${fpga_deps})
endif()
if(LITE_WITH_BM)
set(light_api_deps ${light_api_deps} ${bm_deps})
set(cxx_api_deps ${cxx_api_deps} ${bm_deps})
endif()
message(STATUS "get ops ${ops}")
message(STATUS "get X86 kernels ${x86_kernels}")
message(STATUS "get Host kernels ${host_kernels}")
......@@ -79,7 +84,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass
XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass
CL_DEPS ${opencl_kenrels}
FPGA_DEPS ${fpga_kenrels})
FPGA_DEPS ${fpga_kenrels}
BM_DEPS ${bm_kenrels})
endif()
# for light api
......@@ -245,6 +251,7 @@ lite_cc_test(test_light_api SRCS light_api_test.cc
DEPS light_api program mir_passes paddle_api_light
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_test(test_apis SRCS apis_test.cc
......@@ -253,6 +260,7 @@ lite_cc_test(test_apis SRCS apis_test.cc
X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
......@@ -298,6 +306,7 @@ if(NOT IOS)
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
X86_DEPS ${x86_kernels})
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels}
......@@ -305,6 +314,7 @@ if(NOT IOS)
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels})
endif()
......
......@@ -107,8 +107,7 @@ void TestModel(const std::vector<Place>& valid_places) {
TEST(ResNet50, test_bm) {
std::vector<Place> valid_places({
Place{TARGET(kBM), PRECISION(kInt8)},
Place{TARGET(kBM), PRECISION(kFloat)},
Place{TARGET(kBM), PRECISION(kInt8)}
});
TestModel(valid_places);
......
......@@ -54,7 +54,6 @@ bool PassMatchesTarget(const mir::Pass& pass, TargetType target) {
}
bool PassMatchesKernels(const mir::Pass& pass) {
#if 0
const auto& kernels = pass.GetBoundKernels();
for (const auto& kernel : kernels) {
for (const auto& place : kernel.second) {
......@@ -63,7 +62,6 @@ bool PassMatchesKernels(const mir::Pass& pass) {
}
}
}
#endif
return true;
}
......
......@@ -18,6 +18,7 @@ lite_cc_test(test_gen_code SRCS gen_code_test.cc
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
EXCLUDE_COMPILE_DEPS "ON"
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
......@@ -46,6 +47,7 @@ lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_co
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
EXCLUDE_COMPILE_DEPS "ON"
)
if(NOT LITE_WITH_BM)
return ()
endif()
add_kernel(conv_2d_bm BM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps})
add_kernel(calib_compute_bm BM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps})
message(STATUS "compile with lite BM kernels")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/calib_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
void CalibComputeFp32ToInt8::Run() {
return;
}
void CalibComputeInt8ToFp32::Run() {
return;
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(calib,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
fp32_to_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.Finalize();
REGISTER_LITE_KERNEL(calib,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
int8_to_fp32)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.Finalize();
REGISTER_LITE_KERNEL(calib_once,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeFp32ToInt8,
fp32_to_int8)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.Finalize();
REGISTER_LITE_KERNEL(calib_once,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::CalibComputeInt8ToFp32,
int8_to_fp32)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kInt8))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/operators/calib_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class CalibComputeFp32ToInt8
: public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibComputeFp32ToInt8() override{};
private:
};
class CalibComputeInt8ToFp32
: public KernelLite<TARGET(kBM), PRECISION(kInt8)> {
public:
using param_t = operators::CalibParam;
void Run() override;
~CalibComputeInt8ToFp32() override{};
private:
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/conv_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
template class ConvComputeInt8<PRECISION(kInt8)>;
template class ConvComputeInt8<PRECISION(kFloat)>;
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
conv2d, kBM, kFloat, kNCHW, paddle::lite::kernels::bm::ConvCompute, def)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.BindInput("Bias",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindInput("Filter",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.BindOutput("Output",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW))})
.Finalize();
REGISTER_LITE_KERNEL(
conv2d,
kBM,
kInt8,
kNCHW,
paddle::lite::kernels::bm::ConvComputeInt8<PRECISION(kInt8)>,
int8_out)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindInput("Bias",
{LiteType::GetTensorTy(TARGET(kBM), PRECISION(kFloat))})
.BindInput("Filter",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.BindOutput("Output",
{LiteType::GetTensorTy(TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class ConvCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::ConvParam;
void PrepareForRun() {};
void Run() {};
virtual ~ConvCompute() = default;
};
template <PrecisionType Ptype_out>
class ConvComputeInt8
: public KernelLite<TARGET(kBM), PRECISION(kInt8), DATALAYOUT(kNCHW)> {
public:
using param_t = operators::ConvParam;
void PrepareForRun() {};
void Run() {};
virtual ~ConvComputeInt8() = default;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册