diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index 36ef74c44d4f19c646e97aae81d950aed27c1e47..5b676c9937baa7026d44ea5d9a03b6e8344af058 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -282,6 +282,10 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/opencl" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel" "${INFER_LITE_PUBLISH_ROOT}/opencl" ) + if (NOT LITE_ON_TINY_PUBLISH) add_dependencies(publish_inference_cxx_lib publish_inference_opencl) + else() + add_dependencies(tiny_publish_cxx_lib publish_inference_opencl) + endif() endif() endif() diff --git a/lite/backends/opencl/CMakeLists.txt b/lite/backends/opencl/CMakeLists.txt index 833757bb8c5edc57bde4131c69d2ede812d4156a..3b504fbed6a5cef6ab3cff46c0e9b7009459ac80 100644 --- a/lite/backends/opencl/CMakeLists.txt +++ b/lite/backends/opencl/CMakeLists.txt @@ -2,18 +2,17 @@ if (NOT LITE_WITH_OPENCL) return() endif() +lite_cc_library(opencl_kernels_source_cc SRCS opencl_kernels_source.cc) lite_cc_library(cl_wrapper SRCS cl_wrapper.cc) lite_cc_library(cl_utility SRCS cl_utility.cc DEPS cl_wrapper) -lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility) +lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility opencl_kernels_source_cc) lite_cc_library(cl_context SRCS cl_context.cc DEPS cl_runtime) lite_cc_library(cl_half SRCS cl_half.cc) lite_cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS tensor cl_half) lite_cc_library(cl_image SRCS cl_image.cc DEPS tensor cl_image_converter cl_runtime) lite_cc_library(cl_caller SRCS cl_caller.cc DEPS cl_context cl_image) lite_cc_library(cl_target_wrapper SRCS target_wrapper.cc DEPS cl_runtime) -lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) -lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper) +lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper) add_dependencies(cl_wrapper opencl_clhpp) diff --git a/lite/backends/opencl/cl_caller.cc b/lite/backends/opencl/cl_caller.cc index fc3317d0f929ef3c20716102a2815394b6847db6..8421c784d5da224eacaaa9461b737eed1b4bdd4e 100644 --- a/lite/backends/opencl/cl_caller.cc +++ b/lite/backends/opencl/cl_caller.cc @@ -46,9 +46,8 @@ static void CopyImageData(CLContext* context, delete[] image_data; } -bool InitOpenCLRuntime(std::string cl_path) { +bool InitOpenCLRuntime() { auto* runtime = CLRuntime::Global(); - runtime->set_cl_path(cl_path); return runtime->IsInitSuccess(); } diff --git a/lite/backends/opencl/cl_caller.h b/lite/backends/opencl/cl_caller.h index 1817db9f6bd6d9ecf21978b8293bd9534328de0f..d1f1429e44f8872852797dadcbf2f82c1c9c0269 100644 --- a/lite/backends/opencl/cl_caller.h +++ b/lite/backends/opencl/cl_caller.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace lite { -bool InitOpenCLRuntime(std::string cl_path); +bool InitOpenCLRuntime(); } // namespace lite } // namespace paddle diff --git a/lite/backends/opencl/cl_context.cc b/lite/backends/opencl/cl_context.cc index 0fcb99486eac57e36ee548b809f8f141e0807db8..77bd8cd404f8dce558e0cf1dcbe9c42106077ad2 100644 --- a/lite/backends/opencl/cl_context.cc +++ b/lite/backends/opencl/cl_context.cc @@ -41,8 +41,7 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, return *(it->second); } - auto program = CLRuntime::Global()->CreateProgram( - GetContext(), CLRuntime::Global()->cl_path() + "/cl_kernel/" + file_name); + auto program = CLRuntime::Global()->CreateProgram(GetContext(), file_name); VLOG(3) << " --- begin build program -> " << program_key << " --- "; CLRuntime::Global()->BuildProgram(program.get(), options); diff --git a/lite/backends/opencl/cl_functions_test.cc b/lite/backends/opencl/cl_functions_test.cc index 70f47b47946641edf4d023437b48d46cae93ca6e..ba32d8c803bfd832289a936fe9150ba8d14cd984 100644 --- a/lite/backends/opencl/cl_functions_test.cc +++ b/lite/backends/opencl/cl_functions_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include #include @@ -26,22 +25,18 @@ limitations under the License. */ #include "lite/core/tensor.h" #include "lite/utils/cp_logging.h" -DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path."); - namespace paddle { namespace lite { TEST(cl_test, runtime_test) { auto *runtime = CLRuntime::Global(); CHECK(runtime->IsInitSuccess()); - runtime->set_cl_path(FLAGS_cl_path); runtime->platform(); runtime->device(); runtime->command_queue(); auto &context = runtime->context(); - auto program = runtime->CreateProgram( - context, - runtime->cl_path() + "/cl_kernel/" + "buffer/elementwise_add_kernel.cl"); + auto program = + runtime->CreateProgram(context, "buffer/elementwise_add_kernel.cl"); auto event = runtime->CreateEvent(context); const std::string build_option("-DCL_DTYPE_float"); CHECK(runtime->BuildProgram(program.get(), build_option)); @@ -50,7 +45,6 @@ TEST(cl_test, runtime_test) { TEST(cl_test, context_test) { auto *runtime = CLRuntime::Global(); CHECK(runtime->IsInitSuccess()); - runtime->set_cl_path(FLAGS_cl_path); CLContext context; context.AddKernel("pool_max", "image/pool_kernel.cl", "-DCL_DTYPE_float"); context.AddKernel( @@ -62,7 +56,6 @@ TEST(cl_test, context_test) { TEST(cl_test, kernel_test) { auto *runtime = CLRuntime::Global(); CHECK(runtime->IsInitSuccess()); - runtime->set_cl_path(FLAGS_cl_path); std::unique_ptr context(new CLContext); context->AddKernel( "elementwise_add", "image/elementwise_add_kernel.cl", "-DCL_DTYPE_float"); @@ -121,7 +114,7 @@ TEST(cl_test, kernel_test) { } TEST(cl_test, target_wrapper_buffer_test) { - bool inited = InitOpenCLRuntime(FLAGS_cl_path); + bool inited = InitOpenCLRuntime(); CHECK(inited) << "Fail to initialize OpenCL runtime."; std::unique_ptr context(new CLContext); std::string kernel_name = "elementwise_add"; diff --git a/lite/backends/opencl/cl_image.cc b/lite/backends/opencl/cl_image.cc index b3d3eb3ce3a0b1bd66b30705cd180641667f9f6f..1e21b3d03a4a231f4bb171e83f4038e7922fe19a 100644 --- a/lite/backends/opencl/cl_image.cc +++ b/lite/backends/opencl/cl_image.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "lite/backends/opencl/cl_image.h" +#include #include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_runtime.h" #include "lite/backends/opencl/cl_utility.h" @@ -42,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) { int stride = cl_image.numel() / 20; stride = stride > 0 ? stride : 1; - os << " dims: " << cl_image.tensor_dims_ << "\n"; + os << " dims: "; // << cl_image.tensor_dims_ << "\n"; for (int i = 0; i < cl_image.numel(); i += stride) { os << tensor_data[i] << " "; } diff --git a/lite/backends/opencl/cl_kernel/buffer/im2col_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/im2col_kernel.cl index fe71f4c6ff8856ca679f2e6b29fc20a0d64da9ac..8d3456fa66973b04eaf24a04a42615790a133ddb 100644 --- a/lite/backends/opencl/cl_kernel/buffer/im2col_kernel.cl +++ b/lite/backends/opencl/cl_kernel/buffer/im2col_kernel.cl @@ -15,6 +15,8 @@ limitations under the License. */ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define CL_DTYPE float +#include + __kernel void im2col(__global const CL_DTYPE* data_im, const int img_offset, const int col_chw, diff --git a/lite/backends/opencl/cl_runtime.cc b/lite/backends/opencl/cl_runtime.cc index 0c7b2f8575a88082f6d79a5392c4468715a701b9..aba6b0f16e057ff396a21c98b84d4b26b1eb9dc8 100644 --- a/lite/backends/opencl/cl_runtime.cc +++ b/lite/backends/opencl/cl_runtime.cc @@ -75,13 +75,8 @@ cl::CommandQueue& CLRuntime::command_queue() { std::unique_ptr CLRuntime::CreateProgram( const cl::Context& context, std::string file_name) { - std::ifstream file{file_name, std::ios::binary | std::ios::ate}; - CHECK(file.is_open()) << "Can't open file from " << file_name; - auto size = file.tellg(); - CHECK(size > 0) << "size is too small."; - std::string content(size, '\0'); - file.seekg(0); - file.read(&content[0], size); + auto cl_file = opencl_kernels_files.find(file_name); + std::string content(cl_file->second.begin(), cl_file->second.end()); cl::Program::Sources sources; sources.push_back(content); auto prog = diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h index 0859780c69cc8647c1fd54bf1ab12be29217c9e1..6683a5d92df02ae3a95f2e1b01feb2f303da8558 100644 --- a/lite/backends/opencl/cl_runtime.h +++ b/lite/backends/opencl/cl_runtime.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include #include #include @@ -24,6 +25,9 @@ limitations under the License. */ namespace paddle { namespace lite { +extern const std::map> + opencl_kernels_files; + class CLRuntime { public: static CLRuntime* Global(); diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index 1d0558451fce67433d966d1f4bff82af26459e33..fd595bca5143c373d120cbe476c7948bcc069008 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -34,9 +34,9 @@ lite_cc_library(scope SRCS scope.cc DEPS tensor) lite_cc_library(device_info SRCS device_info.cc DEPS tensor) if (LITE_WITH_ARM) -lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context gflags) +lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context) else() -lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context gflags) +lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context) endif() #-------------------------------------------- GET CODE META INFO ------------------------------------------ @@ -67,6 +67,13 @@ message(STATUS "commit: ${PADDLE_LITE_COMMIT}") configure_file(version.h.in version.h) #----------------------------------------------- NOT CHANGE ----------------------------------------------- +# A trick to generate the opencl_kernels_source.cc +#add_custom_command( +# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/gen_opencl_code.py +# ${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel +# ${CMAKE_BINARY_DIR}/lite/backends/opencl/opencl_kernels_source.cc +# OUTPUT opencl_kernels_source.cc # not a real path to the output to force it execute every time. +# ) # A trick to generate the paddle_use_kernels.h add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py @@ -96,6 +103,8 @@ add_custom_command( add_custom_target(op_list_h DEPENDS ops.h) add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) +#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc) + # create headfile to restore ops info sorted by suppported platforms add_custom_command( COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py diff --git a/lite/core/context.cc b/lite/core/context.cc index 948aac0c794969304b585520bfb7229410555578..be886168e02e21d192305d701110ce5075ffba63 100644 --- a/lite/core/context.cc +++ b/lite/core/context.cc @@ -14,10 +14,6 @@ #include "lite/core/context.h" -#ifdef LITE_WITH_OPENCL -DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path."); -#endif - namespace paddle { namespace lite {} // namespace lite } // namespace paddle diff --git a/lite/core/context.h b/lite/core/context.h index 653329e4f24b1f391ea41ed39819b60c8a598a3b..fd0715d6988f74bfa59f79e2f702bbb1e5accfb5 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -20,7 +20,6 @@ #include "lite/backends/cuda/cuda_utils.h" #endif #ifdef LITE_WITH_OPENCL -#include #include #include "lite/backends/opencl/cl_context.h" #include "lite/backends/opencl/cl_runtime.h" @@ -36,10 +35,7 @@ #include "lite/core/target_wrapper.h" #include "lite/core/tensor.h" #include "lite/utils/all.h" - -#ifdef LITE_WITH_OPENCL -DECLARE_string(cl_path); -#endif +#include "lite/utils/env.h" namespace paddle { namespace lite { @@ -304,7 +300,6 @@ class Context { void InitOnce() { // Init cl runtime. CHECK(CLRuntime::Global()->IsInitSuccess()) << "OpenCL runtime init failed"; - CLRuntime::Global()->set_cl_path(FLAGS_cl_path); cl_context_ = std::make_shared(); cl_wait_list_ = std::make_shared(); diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt index 716ab35050b1551e6b8ac3cb74c960a6f478ad57..05bea8d31f252ab40cae8523727a81a1432401c0 100644 --- a/lite/kernels/opencl/CMakeLists.txt +++ b/lite/kernels/opencl/CMakeLists.txt @@ -34,48 +34,37 @@ add_kernel(grid_sampler_opencl OPENCL basic SRCS grid_sampler_image_compute.cc D # image kernel test # ###################### lite_cc_test(test_activation_image_opencl SRCS activation_image_compute_test.cc - DEPS activation_opencl layout_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS activation_opencl layout_opencl op_registry program context) lite_cc_test(test_conv_image_opencl SRCS conv_image_compute_test.cc - DEPS conv_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS conv_opencl op_registry program context) lite_cc_test(test_depthwise_conv2d_image_opencl SRCS depthwise_conv2d_image_compute_test.cc - DEPS conv_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS conv_opencl op_registry program context) lite_cc_test(test_nearest_interp_image_opencl SRCS nearest_interp_image_compute_test.cc - DEPS nearest_interp_opencl layout_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS nearest_interp_opencl layout_opencl op_registry program context) lite_cc_test(test_pool_image_opencl SRCS pool_image_compute_test.cc - DEPS pool_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS pool_opencl op_registry program context) lite_cc_test(test_scale_image_opencl SRCS scale_image_compute_test.cc - DEPS scale_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS scale_opencl op_registry program context) lite_cc_test(test_reshape_image_opencl SRCS reshape_image_compute_test.cc - DEPS reshape_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS reshape_opencl op_registry program context) lite_cc_test(test_concat_image_opencl SRCS concat_image_compute_test.cc - DEPS concat_opencl layout_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS concat_opencl layout_opencl op_registry program context) lite_cc_test(test_elementwise_mul_image_opencl SRCS elementwise_mul_image_compute_test.cc - DEPS elementwise_mul_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS elementwise_mul_opencl op_registry program context) lite_cc_test(test_layout_image_opencl SRCS layout_image_compute_test.cc - DEPS layout_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS layout_opencl op_registry program context) lite_cc_test(test_elementwise_add_image_opencl SRCS elementwise_add_image_compute_test.cc - DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context) lite_cc_test(test_grid_sampler_image_opencl SRCS grid_sampler_image_compute_test.cc DEPS grid_sampler_opencl op_registry program context @@ -107,37 +96,28 @@ add_kernel(io_copy_opencl OPENCL basic SRCS io_copy_buffer_compute.cc DEPS ${ten # buffer kernel test # ###################### #lite_cc_test(test_activation_buffer_opencl SRCS activation_buffer_compute_test.cc -# DEPS activation_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS activation_opencl op_registry program context) #lite_cc_test(test_conv_buffer_opencl SRCS conv_buffer_compute_test.cc -# DEPS conv_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS conv_opencl op_registry program context) #lite_cc_test(test_depthwise_conv2d_buffer_opencl SRCS depthwise_conv2d_buffer_compute_test.cc -# DEPS depthwise_conv2d_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS depthwise_conv2d_opencl op_registry program context) #lite_cc_test(test_pool_buffer_opencl SRCS pool_buffer_compute_test.cc -# DEPS pool_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS pool_opencl op_registry program context) #lite_cc_test(test_concat_buffer_opencl SRCS concat_buffer_compute_test.cc -# DEPS concat_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS concat_opencl op_registry program context) lite_cc_test(test_fc_buffer_opencl SRCS fc_buffer_compute_test.cc - DEPS fc_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS fc_opencl op_registry program context) lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc - DEPS mul_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS mul_opencl op_registry program context) #lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc -# DEPS elementwise_add_opencl op_registry program context -# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) +# DEPS elementwise_add_opencl op_registry program context) lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc - DEPS io_copy_opencl op_registry program context - ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) + DEPS io_copy_opencl op_registry program context) diff --git a/lite/tools/build.sh b/lite/tools/build.sh index a90d67ebbc2724ac15147fc4de2989bf517ae218..6999b860f2d6853c1b5c2ff44df7faf83d2594b8 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -42,13 +42,26 @@ function prepare_workspace { GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code mkdir -p ${GEN_CODE_PATH_PREFIX} touch ${GEN_CODE_PATH_PREFIX}/__generated_code__.cc - # 2.Prepare debug tool DEBUG_TOOL_PATH_PREFIX=$build_dir/lite/tools/debug mkdir -p ${DEBUG_TOOL_PATH_PREFIX} cp $root_dir/lite/tools/debug/analysis_tool.py ${DEBUG_TOOL_PATH_PREFIX}/ } + +function prepare_opencl_source_code { + local root_dir=$1 + local build_dir=$2 + # in build directory + # Prepare opencl_kernels_source.cc file + GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl + rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc + OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel + mkdir -p ${GEN_CODE_PATH_OPENCL} + touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc + python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc +} + function prepare_thirdparty { if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then rm -rf $workspace/third-party @@ -113,6 +126,45 @@ function make_tiny_publish_so { cd - > /dev/null } +function make_opencl { + local os=$1 + local abi=$2 + local lang=$3 + #git submodule update --init --recursive + prepare_thirdparty + + root_dir=$(pwd) + build_dir=$root_dir/build.lite.${os}.${abi}.${lang}.opencl + if [ -d $build_directory ] + then + rm -rf $build_directory + fi + mkdir -p $build_dir + cd $build_dir + prepare_workspace $root_dir $build_dir + prepare_opencl_source_code $root_dir $build_dir + # $1: ARM_TARGET_OS in "android" , "armlinux" + # $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf" + # $3: ARM_TARGET_LANG in "gcc" "clang" + cmake .. \ + -DLITE_WITH_OPENCL=ON \ + -DWITH_GPU=OFF \ + -DWITH_MKL=OFF \ + -DWITH_LITE=ON \ + -DLITE_WITH_CUDA=OFF \ + -DLITE_WITH_X86=OFF \ + -DLITE_WITH_ARM=ON \ + -DWITH_ARM_DOTPROD=ON \ + -DLITE_ON_TINY_PUBLISH=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ + -DWITH_TESTING=OFF \ + -DLITE_BUILD_EXTRA=ON \ + -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3 + + make opencl_clhpp -j4 + make publish_inference -j4 +} + function make_full_publish_so { local os=$1 local abi=$2 @@ -398,6 +450,10 @@ function main { build_opt shift ;; + opencl) + make_opencl $ARM_OS $ARM_ABI $ARM_LANG + shift + ;; cuda) make_cuda shift diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 1960dc1e1506f9742cdd9be41d5448c646c026af..5fedb86d9f797d5280f158339ad4fa9c77b0879a 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -37,6 +37,19 @@ function prepare_thirdparty { fi } +function prepare_opencl_source_code { + local root_dir=$1 + local build_dir=$2 + # in build directory + # Prepare opencl_kernels_source.cc file + GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl + rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc + OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel + mkdir -p ${GEN_CODE_PATH_OPENCL} + touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc + python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc +} + # prepare adb devices # if USE_ADB_EMULATOR=ON , we create adb emulator port_armv8 and port_armv7 for usage, else we will use actual mobilephone according to adbindex. function prepare_adb_devices { @@ -173,6 +186,8 @@ function build_opencl { mkdir -p $build_dir cd $build_dir + prepare_opencl_source_code $cur_dir $build_dir + cmake_opencl ${os} ${abi} ${lang} make opencl_clhpp build $TESTS_FILE diff --git a/lite/tools/cmake_tools/gen_opencl_code.py b/lite/tools/cmake_tools/gen_opencl_code.py new file mode 100644 index 0000000000000000000000000000000000000000..4348f6d65b12f642db35a01df10f9c24a7d7ff04 --- /dev/null +++ b/lite/tools/cmake_tools/gen_opencl_code.py @@ -0,0 +1,157 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import os +import sys +import logging + +opencl_kernel_path="" +opencl_dest_path="" + +def gen_opencl_kernels(): + source = """ +#pragma +#ifdef LITE_WITH_OPENCL +#include +#include +#include +namespace paddle { +namespace lite { + // file name => source + extern const std::map> opencl_kernels_files = { + %s + }; +} // namespace lite +} // namespace paddle +#endif + """ + + + def clean_source(content): + new_content = re.sub(r"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/", "", content, flags=re.DOTALL) + lines = new_content.split("\n") + new_lines = [] + for i in range(len(lines)): + line = lines[i] + line = re.sub(r"//.*$", "", line) + line = line.strip() + if line == "": + continue + new_lines.append(line) + new_content = "\n".join(new_lines) + return new_content + + infile = open(opencl_kernel_path+"/cl_common.h", "r") + common_content = infile.read() + infile.close() + common_content = clean_source(common_content) + + def get_header_raw(content): + lines = content.split("\n") + new_lines = [] + for line in lines: + if "__kernel void" in line: + break + new_lines.append(line) + header = "\n".join(new_lines) + return header + common_header = get_header_raw(common_content) + + def get_header(content): + lines = content.split("\n") + new_lines = [] + for line in lines: + if "__kernel" in line: + break + new_lines.append(line) + for i in range(len(lines)): + if "#include \"cl_common.h\"" in lines[i] or "#include " in lines[i]: + lines[i] = common_header + header = "\n".join(lines) + return header + + + filenames = os.listdir(opencl_kernel_path+"/buffer") + file_count = len(filenames) + + headers = {} + funcs = {} + for i in range(file_count): + filename = filenames[i] + infile = open(opencl_kernel_path+"/buffer/" + filename, "r") + content = infile.read() + infile.close() + content = clean_source(content) + header = get_header(content) + headers["buffer/" + filename] = header + + + image_filenames = os.listdir(opencl_kernel_path+"/image") + image_file_count = len(image_filenames) + + for i in range(image_file_count): + filename = image_filenames[i] + infile = open(opencl_kernel_path+"/image/" + filename, "r") + content = infile.read() + infile.close() + content = clean_source(content) + header = get_header(content) + headers["image/" + filename] = header + + + + + core1 = "" + for i in range(len(headers)): + file_name = list(headers.keys())[i] + content = headers[file_name] + if content == "": + content = " " + hexes = [] + for char in content: + hexes.append(hex(ord(char))) + core = " {\"%s\", {" % file_name + for item in hexes: + core += str(item) + ", " + core = core[: -2] + core += "}}" + if i != len(headers) - 1: + core += ",\n" + core1 += core + source = source % (core1) + with open(opencl_dest_path, 'w') as f: + logging.info("write opencl kernels source files to %s" % opencl_dest_path) + f.write(source) + +def gen_empty_opencl_kernels(): + source = """ + #pragma once + #ifdef PADDLE_MOBILE_CL + #include + #include + #include + namespace paddle_mobile { + // func name => source + extern const std::map> opencl_kernels = { + }; + } + #endif + """ + + +if __name__ == "__main__": + opencl_kernel_path = sys.argv[1] + opencl_dest_path = sys.argv[2] + gen_opencl_kernels()