提交 6e7550f8 编写于 作者: W Wang Zhen

Merge branch 'opencl_ci' into 'incubate/lite'

fix opencl compile bugs.

See merge request inference/paddlelite!73
......@@ -87,6 +87,25 @@ build:mobile_android:
dependencies:
- build:server
build:mobile_android_cl:
tags:
- lite
stage: build_mobile
image: $MOBILE_LITE_DOCKER_IMAGE
cache:
key: mobile_thirdparty
paths:
- build.lite.android.armv8.gcc.opencl/third_party
- build.lite.android.armv7.gcc.opencl/third_party
- ~/.ccache
- $CI_PROJECT_DIR/build_mobile_ccache_cl
script:
- export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_ccache_cl
- ./paddle/fluid/lite/tools/build.sh build_test_arm_opencl
dependencies:
- build:server
build:mobile_armlinux:
tags:
- lite
......
......@@ -112,7 +112,7 @@ file(WRITE ${__lite_cc_files} "") # clean
function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -121,6 +121,7 @@ function(lite_cc_library TARGET)
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
......@@ -135,6 +136,7 @@ function(lite_cc_library TARGET)
else()
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
endif()
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
foreach(cc_file ${args_SRCS})
file(APPEND ${__lite_cc_files} "${cc_file}\n")
......@@ -152,7 +154,7 @@ endfunction()
function(lite_cc_binary TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -161,12 +163,14 @@ function(lite_cc_binary TARGET)
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
# collect targets need to compile for lite
add_dependencies(lite_compile_deps ${TARGET})
endfunction()
......@@ -178,7 +182,7 @@ file(WRITE ${offline_test_registry_file} "") # clean
function(lite_cc_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -188,12 +192,14 @@ function(lite_cc_test TARGET)
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
file(APPEND ${offline_test_registry_file} "${TARGET}\n")
# collect targets need to compile for lite
......
......@@ -2,7 +2,7 @@ set(cxx_api_lite_deps
scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite)
if(LITE_WITH_CUDA)
set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
lite_cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
nv_test(test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda)
endif()
......@@ -12,8 +12,7 @@ lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
DEPS scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite
${ops_lite} ${host_kernels}
CUDA_DEPS kernels_cuda
X86_DEPS ${x86_kernels}
)
X86_DEPS ${x86_kernels})
set(light_api_deps
scope_lite target_wrapper_host model_parser_lite program_lite)
......@@ -31,8 +30,7 @@ lite_cc_library(cxx_api_lite
DEPS ${cxx_api_lite_deps} ${ops_lite} ${host_kernels} program_lite
X86_DEPS ${x86_kernels} operator
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kenrels}
)
CL_DEPS ${opencl_kenrels})
lite_cc_library(light_api_lite SRCS light_api.cc
DEPS scope_lite target_wrapper_host model_parser_lite
......@@ -40,8 +38,7 @@ lite_cc_library(light_api_lite SRCS light_api.cc
CUDA_DEPS target_wrapper_cuda
X86_DEPS ${x86_kernels} operator
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kenrels}
)
CL_DEPS ${opencl_kenrels})
include(ExternalProject)
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
......@@ -61,29 +58,35 @@ if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
add_dependencies(test_googlenet_lite extern_lite_download_GoogleNet_inference_tar_gz)
endif()
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
set(lite_model_test_DEPS cxx_api_lite mir_passes ${ops_lite} ${host_kernels} ${arm_kernels})
lite_cc_test(test_mobilenetv1_lite SRCS mobilenetv1_test.cc
DEPS ${lite_model_test_DEPS}
CL_DEPS ${opencl_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/mobilenet_v1 SERIAL)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
--model_dir=${LITE_MODEL_DIR}/mobilenet_v1 SERIAL)
add_dependencies(test_mobilenetv1_lite extern_lite_download_mobilenet_v1_tar_gz)
lite_cc_test(test_mobilenetv2_lite SRCS mobilenetv2_test.cc
DEPS ${lite_model_test_DEPS}
ARGS --model_dir=${LITE_MODEL_DIR}/mobilenet_v2 SERIAL)
CL_DEPS ${opencl_kernels}
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
--model_dir=${LITE_MODEL_DIR}/mobilenet_v2 SERIAL)
add_dependencies(test_mobilenetv2_lite extern_lite_download_mobilenet_v2_relu_tar_gz)
lite_cc_test(test_resnet50_lite SRCS resnet50_test.cc
DEPS ${lite_model_test_DEPS}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50 SERIAL)
CL_DEPS ${opencl_kernels}
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
--model_dir=${LITE_MODEL_DIR}/resnet50 SERIAL)
add_dependencies(test_resnet50_lite extern_lite_download_resnet50_tar_gz)
lite_cc_test(test_inceptionv4_lite SRCS inceptionv4_test.cc
DEPS ${lite_model_test_DEPS}
ARGS --model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL)
CL_DEPS ${opencl_kernels}
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
--model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL)
add_dependencies(test_inceptionv4_lite extern_lite_download_inception_v4_simple_tar_gz)
endif()
......@@ -91,14 +94,15 @@ endif()
# TODO(Superjomn) support latter.
lite_cc_test(test_light_api_lite SRCS light_api_test.cc
DEPS light_api_lite program_lite mir_passes
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt
SERIAL)
CL_DEPS ${opencl_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_test(test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite ${ops_lite}
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels} operator
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_library(paddle_api_lite SRCS paddle_api.cc DEPS op_params_lite)
......@@ -107,7 +111,7 @@ lite_cc_library(paddle_api_lite SRCS paddle_api.cc DEPS op_params_lite)
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api_lite
${ops_lite}
ARM_DEPS ${arm_kernels}
)
CL_DEPS ${opencl_kernels})
# The final inference library for just MobileConfig.
lite_cc_library(paddle_api_light SRCS light_api_impl.cc DEPS light_api_lite paddle_api_lite)
......@@ -119,12 +123,14 @@ bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api
lite_cc_test(test_paddle_api_lite SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${ops_lite}
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
lite_cc_test(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light
${ops_lite}
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels})
if (WITH_TESTING)
......@@ -147,5 +153,8 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin_int8.cc
target_wrapper_host
mir_passes
${ops_lite} ${host_kernels}
ARM_DEPS ${arm_kernels})
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc DEPS paddle_api_full)
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kernels})
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc
DEPS paddle_api_full
CL_DEPS ${opencl_kernels})
......@@ -7,10 +7,10 @@ if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))
endif()
# TODO(xxx): seperate them
cc_library(math_arm SRCS
funcs.cc
packed_sgemm.cc
softmax.cc
lite_cc_library(math_arm SRCS
funcs.cc
packed_sgemm.cc
softmax.cc
scale.cc
pooling.cc
elementwise.cc
......
if (WITH_TESTING)
cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest gflags)
lite_cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest gflags)
endif()
lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc
DEPS target_wrapper_host place_lite
......@@ -19,27 +19,27 @@ endif()
proto_library(framework_proto_lite SRCS framework.proto)
cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
if (LITE_WITH_X86)
cc_library(variable_lite SRCS variable.cc DEPS framework_proto)
cc_library(types_lite SRCS types.cc DEPS framework_proto)
lite_cc_library(variable_lite SRCS variable.cc DEPS framework_proto)
lite_cc_library(types_lite SRCS types.cc DEPS framework_proto)
else()
cc_library(variable_lite SRCS variable.cc)
cc_library(types_lite SRCS types.cc)
lite_cc_library(variable_lite SRCS variable.cc)
lite_cc_library(types_lite SRCS types.cc)
endif()
cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
cc_library(cpu_info_lite SRCS cpu_info.cc)
lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3)
cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite
lite_cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
lite_cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
lite_cc_library(cpu_info_lite SRCS cpu_info.cc)
lite_cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite eigen3 CL_DEPS cl_helper)
lite_cc_library(kernel_lite SRCS kernel.cc DEPS context_lite type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite ${tensor_lite})
lite_cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite kernel_lite
cpp_op_desc_lite ${tensor_lite})
cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
lite_cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
lite_cc_library(program_lite SRCS program.cc
DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite ${ops_lite}
HVY_DEPS framework_proto
PROFILE_DEPS basic_profiler_lite)
cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
lite_cc_library(optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite)
add_subdirectory(mir)
add_subdirectory(profile)
......@@ -49,7 +49,7 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
return()
endif()
cc_library(program_fake_utils SRCS program_fake_utils.cc DEPS mir_ssa_graph
lite_cc_library(program_fake_utils SRCS program_fake_utils.cc DEPS mir_ssa_graph
scope_lite op_registry_lite proto_desc op_lite
${ops_lite}
${host_kernels}
......
cc_library(mir_node SRCS node.cc DEPS framework_proto_lite)
cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node program_lite)
cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph)
cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes)
cc_library(mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager)
lite_cc_library(mir_node SRCS node.cc DEPS framework_proto_lite)
lite_cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node program_lite)
lite_cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph)
lite_cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes)
lite_cc_library(mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager)
add_subdirectory(fusion)
add_subdirectory(elimination)
cc_library(mir_passes
lite_cc_library(mir_passes
SRCS
fusion/fc_fuse_pass.cc
fusion/conv_elementwise_add_activation_fuse_pass.cc
......@@ -28,7 +28,7 @@ cc_library(mir_passes
runtime_context_assign_pass.cc
DEPS mir_pass types_lite context_lite ${mir_fusers})
#cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
# lite_cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
#mir_ssa_graph scope_lite op_lite
#fc_op_lite
#${host_kernels}
......@@ -68,7 +68,7 @@ lite_cc_library(pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS p
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
return()
endif()
cc_test(test_mir_pass_manager SRCS pass_manager_test.cc DEPS mir_pass_manager mir_passes)
lite_cc_test(test_mir_pass_manager SRCS pass_manager_test.cc DEPS mir_pass_manager mir_passes)
# TODO(wz) replace framework/proto to lite proto.
......
cc_library(fuse_fc
lite_cc_library(fuse_fc
SRCS fc_fuser.cc
DEPS pattern_matcher_high_api)
cc_library(fuse_conv_elementwise_add_activation
lite_cc_library(fuse_conv_elementwise_add_activation
SRCS conv_elementwise_add_activation_fuser.cc
DEPS pattern_matcher_high_api)
cc_library(fuse_conv_bn
lite_cc_library(fuse_conv_bn
SRCS conv_bn_fuser.cc
DEPS pattern_matcher_high_api)
cc_library(fuse_elementwise_add_activation
lite_cc_library(fuse_elementwise_add_activation
SRCS elementwise_add_activation_fuser.cc
DEPS pattern_matcher_high_api)
cc_library(fuse_quant_dequant
lite_cc_library(fuse_quant_dequant
SRCS quant_dequant_op_fuser.cc
DEPS pattern_matcher_high_api)
......
cc_library(target_wrapper_host SRCS target_wrapper.cc)
lite_cc_library(target_wrapper_host SRCS target_wrapper.cc)
......@@ -4,20 +4,20 @@ endif()
message(STATUS "compile with lite ARM kernels")
cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(activation_compute_arm SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(elementwise_compute_arm SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(activation_compute_arm SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(elementwise_compute_arm SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
lite_cc_test(test_activation_compute_arm SRCS activation_compute_test.cc DEPS activation_compute_arm)
......
......@@ -5,7 +5,7 @@ endif()
message(STATUS "compile with lite CUDA kernels")
nv_library(mul_compute_cuda SRCS mul_compute.cc DEPS ${tensor_lite})
cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
lite_cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
nv_library(kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite)
......
message(STATUS "compile with lite host kernels")
cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps})
cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
cc_library(reshape_compute_host SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op_lite)
lite_cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(reshape_compute_host SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op_lite)
lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host)
......
......@@ -4,17 +4,19 @@ endif()
set(cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper)
cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps})
cc_library(pool_opencl SRCS pool_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(pool_opencl SRCS pool_compute.cc DEPS ${cl_kernel_deps})
lite_cc_test(test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
op_registry_lite program_lite
context_lite
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
)
lite_cc_test(test_pool_opencl SRCS pool_compute_test.cc DEPS pool_opencl
op_registry_lite program_lite
context_lite
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl
)
set(opencl_kernels
......
......@@ -30,14 +30,14 @@ class PoolCompute
using param_t = operators::PoolParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& in_dims = param.x->dims();
auto& out_dims = param.output->dims();
const auto& param = *param_.get_mutable<param_t>();
const auto& in_dims = param.x->dims();
const auto& out_dims = param.output->dims();
const std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling;
std::vector<int>& paddings = param.paddings;
std::vector<int>& strides = param.strides;
std::vector<int>& ksize = param.ksize;
const bool global_pooling = param.global_pooling;
std::vector<int> paddings = param.paddings;
std::vector<int> strides = param.strides;
std::vector<int> ksize = param.ksize;
if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
......
......@@ -2,23 +2,23 @@ if(NOT LITE_WITH_X86)
return()
endif()
cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps})
cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
lite_cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps})
cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps})
cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps})
cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op)
cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} )
cc_library(concat_compute_x86 SRCS concat_compute.cc DEPS ${lite_kernel_deps} )
cc_library(conv_compute_x86 SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col)
cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling)
cc_library(batch_norm_compute_x86 SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
cc_library(uniform_random_compute_x86 SRCS uniform_random_compute.cc DEPS ${lite_kernel_deps} )
lite_cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op)
lite_cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
lite_cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} )
lite_cc_library(concat_compute_x86 SRCS concat_compute.cc DEPS ${lite_kernel_deps} )
lite_cc_library(conv_compute_x86 SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col)
lite_cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling)
lite_cc_library(batch_norm_compute_x86 SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(uniform_random_compute_x86 SRCS uniform_random_compute.cc DEPS ${lite_kernel_deps} )
lite_cc_test(test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86)
lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
......
#cc_library(runtime_lite SRCS runtime.cc)
#lite_cc_library(runtime_lite SRCS runtime.cc)
#TODO(Superjomn) enable it again.
if(NOT LITE_ON_MOBILE)
......@@ -10,8 +10,7 @@ if(NOT LITE_ON_MOBILE)
endif(WITH_TESTING)
endif()
cc_library(compatible_pb_lite SRCS compatible_pb.cc
lite_cc_library(compatible_pb_lite SRCS compatible_pb.cc
DEPS op_desc_lite framework_proto_lite var_desc_lite cpp_op_desc_lite)
lite_cc_library(model_parser_lite SRCS model_parser.cc DEPS
......@@ -23,6 +22,5 @@ lite_cc_library(model_parser_lite SRCS model_parser.cc DEPS
lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite)
add_subdirectory(pb)
add_subdirectory(cpp)
cc_library(cpp_op_desc_lite SRCS op_desc.cc DEPS any_lite)
lite_cc_library(cpp_op_desc_lite SRCS op_desc.cc DEPS any_lite)
cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite)
cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite)
lite_cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite)
lite_cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite)
......@@ -2,14 +2,15 @@ if (NOT LITE_WITH_OPENCL)
return()
endif()
cc_library(cl_wrapper SRCS cl_wrapper.cxx)
cc_library(cl_tool SRCS cl_tool.cc DEPS cl_wrapper)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS lite_tensor)
cc_library(cl_image SRCS cl_image.cc DEPS lite_tensor cl_image_converter cl_engine)
cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper)
lite_cc_library(cl_wrapper SRCS cl_wrapper.cxx)
lite_cc_library(cl_tool SRCS cl_tool.cc DEPS cl_wrapper)
lite_cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
lite_cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
lite_cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
lite_cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS lite_tensor)
lite_cc_library(cl_image SRCS cl_image.cc DEPS lite_tensor cl_image_converter cl_engine)
lite_cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
ARGS --cl_path=${CMAKE_SOURCE_DIR}/paddle/fluid/lite/opencl)
add_dependencies(cl_tool opencl_clhpp)
......@@ -17,7 +17,7 @@ limitations under the License. */
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
namespace paddle {
namespace lite {
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
......
......@@ -18,9 +18,9 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
namespace paddle {
namespace lite {
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
namespace paddle {
namespace lite {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
namespace paddle {
namespace lite {
......
......@@ -18,7 +18,7 @@
#include <glog/logging.h>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_include.h"
/**
* Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h
......
set(op_DEPS ${tensor_lite} op_lite op_params_lite)
cc_library(conv_op_lite SRCS conv_op.cc DEPS ${op_DEPS})
cc_library(pool_op_lite SRCS pool_op.cc DEPS ${op_DEPS})
cc_library(fc_op_lite SRCS fc_op.cc DEPS ${op_DEPS})
cc_library(relu_op_lite SRCS relu_op.cc DEPS ${op_DEPS})
cc_library(mul_op_lite SRCS mul_op.cc DEPS ${op_DEPS})
cc_library(scale_op_lite SRCS scale_op.cc DEPS ${op_DEPS})
cc_library(softmax_op_lite SRCS softmax_op.cc DEPS ${op_DEPS})
cc_library(reshape_op_lite SRCS reshape_op.cc DEPS ${op_DEPS} )
cc_library(batch_norm_op_lite SRCS batch_norm_op.cc DEPS ${op_DEPS})
cc_library(feed_op_lite SRCS feed_op.cc DEPS ${op_DEPS})
cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS})
cc_library(io_copy_op_lite SRCS io_copy_op.cc DEPS ${op_DEPS})
cc_library(activation_ops_lite SRCS activation_ops.cc DEPS ${op_DEPS})
cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
cc_library(fusion_elementwise_activation_ops_lite SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops_lite ${op_DEPS})
cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS})
cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
cc_library(uniform_random_op_lite SRCS uniform_random_op.cc DEPS ${op_DEPS})
lite_cc_library(conv_op_lite SRCS conv_op.cc DEPS ${op_DEPS})
lite_cc_library(pool_op_lite SRCS pool_op.cc DEPS ${op_DEPS})
lite_cc_library(fc_op_lite SRCS fc_op.cc DEPS ${op_DEPS})
lite_cc_library(relu_op_lite SRCS relu_op.cc DEPS ${op_DEPS})
lite_cc_library(mul_op_lite SRCS mul_op.cc DEPS ${op_DEPS})
lite_cc_library(scale_op_lite SRCS scale_op.cc DEPS ${op_DEPS})
lite_cc_library(softmax_op_lite SRCS softmax_op.cc DEPS ${op_DEPS})
lite_cc_library(reshape_op_lite SRCS reshape_op.cc DEPS ${op_DEPS} )
lite_cc_library(batch_norm_op_lite SRCS batch_norm_op.cc DEPS ${op_DEPS})
lite_cc_library(feed_op_lite SRCS feed_op.cc DEPS ${op_DEPS})
lite_cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS})
lite_cc_library(io_copy_op_lite SRCS io_copy_op.cc DEPS ${op_DEPS})
lite_cc_library(activation_ops_lite SRCS activation_ops.cc DEPS ${op_DEPS})
lite_cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
lite_cc_library(fusion_elementwise_activation_ops_lite SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops_lite ${op_DEPS})
lite_cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS})
lite_cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
lite_cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
lite_cc_library(uniform_random_op_lite SRCS uniform_random_op.cc DEPS ${op_DEPS})
cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS})
cc_library(calib_op_lite SRCS calib_op.cc DEPS ${op_DEPS})
cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS})
cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
cc_library(fake_dequant SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
lite_cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
lite_cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
lite_cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS})
lite_cc_library(calib_op_lite SRCS calib_op.cc DEPS ${op_DEPS})
lite_cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
lite_cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS})
lite_cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
lite_cc_library(fake_dequant SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
set(ops_lite
conv_op_lite
......
......@@ -34,8 +34,10 @@ function cmake_x86 {
}
function cmake_opencl {
prepare_workspace
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
......@@ -46,9 +48,49 @@ function cmake_opencl {
-DLITE_WITH_ARM=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3
}
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
function build_opencl {
os=$1
abi=$2
lang=$3
cur_dir=$(pwd)
if [[ ${os} == "armlinux" ]]; then
# TODO(hongming): enable compile armv7 and armv7hf on armlinux, and clang compile
if [[ ${lang} == "clang" ]]; then
echo "clang is not enabled on armlinux yet"
return 0
fi
if [[ ${abi} == "armv7hf" ]]; then
echo "armv7hf is not supported on armlinux yet"
return 0
fi
if [[ ${abi} == "armv7" ]]; then
echo "armv7 is not supported on armlinux yet"
return 0
fi
fi
if [[ ${os} == "android" && ${abi} == "armv7hf" ]]; then
echo "android do not need armv7hf"
return 0
fi
build_dir=$cur_dir/build.lite.${os}.${abi}.${lang}.opencl
mkdir -p $build_dir
cd $build_dir
cmake_opencl ${os} ${abi} ${lang}
build $TESTS_FILE
# test publish inference lib
make publish_inference_lite
}
# This method is only called in CI.
function cmake_x86_for_CI {
......@@ -351,6 +393,20 @@ function arm_push_necessary_file {
adb -s emulator-${port} push ${testpath} ${adb_work_dir}
}
function build_test_arm_opencl {
########################################################################
cur=$PWD
# job 1
build_opencl "android" "armv8" "gcc"
cd $cur
# job 2
build_opencl "android" "armv7" "gcc"
cd $cur
echo "Done"
}
# We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime.
# sub-task1
......@@ -398,17 +454,17 @@ function build_test_arm_subtask_armlinux {
cur=$PWD
# job 5
build_arm "armlinux" "armv8" "gcc" $port_armv8
build_arm "armlinux" "armv8" "gcc"
test_arm "armlinux" "armv8" "gcc" $port_armv8
cd $cur
# job 6
build_arm "armlinux" "armv7" "gcc" $port_armv8
build_arm "armlinux" "armv7" "gcc"
test_arm "armlinux" "armv7" "gcc" $port_armv8
cd $cur
# job 7
build_arm "armlinux" "armv7hf" "gcc" $port_armv8
build_arm "armlinux" "armv7hf" "gcc"
test_arm "armlinux" "armv7hf" "gcc" $port_armv8
cd $cur
......@@ -532,7 +588,7 @@ function main {
shift
;;
cmake_opencl)
cmake_opencl $ARM_OS $ARM_ABI
cmake_opencl $ARM_OS $ARM_ABI $ARM_LANG
shift
;;
cmake_cuda)
......@@ -543,6 +599,10 @@ function main {
cmake_arm $ARM_OS $ARM_ABI $ARM_LANG
shift
;;
build_opencl)
build_opencl $ARM_OS $ARM_ABI $ARM_LANG
shift
;;
build_arm)
build_arm $ARM_OS $ARM_ABI $ARM_LANG
shift
......@@ -552,7 +612,7 @@ function main {
shift
;;
test_arm)
build_arm $ARM_OS $ARM_ABI $ARM_LANG $ARM_PORT
test_arm $ARM_OS $ARM_ABI $ARM_LANG $ARM_PORT
shift
;;
test_arm_android)
......@@ -571,6 +631,10 @@ function main {
build_test_arm
shift
;;
build_test_arm_opencl)
build_test_arm_opencl
shift
;;
build_test_arm_subtask_android)
build_test_arm_subtask_android
shift
......
cc_library(debug_utils_lite SRCS debug_utils.cc DEPS op_params_lite)
lite_cc_library(debug_utils_lite SRCS debug_utils.cc DEPS op_params_lite)
lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
DEPS
......@@ -9,4 +9,5 @@ lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
mir_passes
${ops_lite} ${host_kernels}
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels})
ARM_DEPS ${arm_kernels}
CL_DEPS ${opencl_kernels})
......@@ -7,6 +7,6 @@
set(utils_DEPS glog)
lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite)
cc_library(any_lite SRCS any.cc)
cc_library(utils_lite SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any_lite)
lite_cc_library(any_lite SRCS any.cc)
lite_cc_library(utils_lite SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any_lite)
......@@ -2,4 +2,4 @@ if (NOT LITE_WITH_X86)
return()
endif()
cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
lite_cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册