未验证 提交 51e14609 编写于 作者: H huzhiqiang 提交者: GitHub

[opencl compile] add into build.sh (#3031)

* test=devellop

* add cl file into resulted lib test=develop

* test=develop

* test=develop
上级 18d974c0
...@@ -282,6 +282,10 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -282,6 +282,10 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/opencl" COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/opencl"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel" "${INFER_LITE_PUBLISH_ROOT}/opencl" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel" "${INFER_LITE_PUBLISH_ROOT}/opencl"
) )
if (NOT LITE_ON_TINY_PUBLISH)
add_dependencies(publish_inference_cxx_lib publish_inference_opencl) add_dependencies(publish_inference_cxx_lib publish_inference_opencl)
else()
add_dependencies(tiny_publish_cxx_lib publish_inference_opencl)
endif()
endif() endif()
endif() endif()
...@@ -2,18 +2,17 @@ if (NOT LITE_WITH_OPENCL) ...@@ -2,18 +2,17 @@ if (NOT LITE_WITH_OPENCL)
return() return()
endif() endif()
lite_cc_library(opencl_kernels_source_cc SRCS opencl_kernels_source.cc)
lite_cc_library(cl_wrapper SRCS cl_wrapper.cc) lite_cc_library(cl_wrapper SRCS cl_wrapper.cc)
lite_cc_library(cl_utility SRCS cl_utility.cc DEPS cl_wrapper) lite_cc_library(cl_utility SRCS cl_utility.cc DEPS cl_wrapper)
lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility) lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility opencl_kernels_source_cc)
lite_cc_library(cl_context SRCS cl_context.cc DEPS cl_runtime) lite_cc_library(cl_context SRCS cl_context.cc DEPS cl_runtime)
lite_cc_library(cl_half SRCS cl_half.cc) lite_cc_library(cl_half SRCS cl_half.cc)
lite_cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS tensor cl_half) lite_cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS tensor cl_half)
lite_cc_library(cl_image SRCS cl_image.cc DEPS tensor cl_image_converter cl_runtime) lite_cc_library(cl_image SRCS cl_image.cc DEPS tensor cl_image_converter cl_runtime)
lite_cc_library(cl_caller SRCS cl_caller.cc DEPS cl_context cl_image) lite_cc_library(cl_caller SRCS cl_caller.cc DEPS cl_context cl_image)
lite_cc_library(cl_target_wrapper SRCS target_wrapper.cc DEPS cl_runtime) lite_cc_library(cl_target_wrapper SRCS target_wrapper.cc DEPS cl_runtime)
lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl) lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper)
lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
add_dependencies(cl_wrapper opencl_clhpp) add_dependencies(cl_wrapper opencl_clhpp)
...@@ -46,9 +46,8 @@ static void CopyImageData(CLContext* context, ...@@ -46,9 +46,8 @@ static void CopyImageData(CLContext* context,
delete[] image_data; delete[] image_data;
} }
bool InitOpenCLRuntime(std::string cl_path) { bool InitOpenCLRuntime() {
auto* runtime = CLRuntime::Global(); auto* runtime = CLRuntime::Global();
runtime->set_cl_path(cl_path);
return runtime->IsInitSuccess(); return runtime->IsInitSuccess();
} }
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace lite { namespace lite {
bool InitOpenCLRuntime(std::string cl_path); bool InitOpenCLRuntime();
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -41,8 +41,7 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, ...@@ -41,8 +41,7 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
return *(it->second); return *(it->second);
} }
auto program = CLRuntime::Global()->CreateProgram( auto program = CLRuntime::Global()->CreateProgram(GetContext(), file_name);
GetContext(), CLRuntime::Global()->cl_path() + "/cl_kernel/" + file_name);
VLOG(3) << " --- begin build program -> " << program_key << " --- "; VLOG(3) << " --- begin build program -> " << program_key << " --- ";
CLRuntime::Global()->BuildProgram(program.get(), options); CLRuntime::Global()->BuildProgram(program.get(), options);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gflags/gflags.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
...@@ -26,22 +25,18 @@ limitations under the License. */ ...@@ -26,22 +25,18 @@ limitations under the License. */
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
namespace paddle { namespace paddle {
namespace lite { namespace lite {
TEST(cl_test, runtime_test) { TEST(cl_test, runtime_test) {
auto *runtime = CLRuntime::Global(); auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess()); CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
runtime->platform(); runtime->platform();
runtime->device(); runtime->device();
runtime->command_queue(); runtime->command_queue();
auto &context = runtime->context(); auto &context = runtime->context();
auto program = runtime->CreateProgram( auto program =
context, runtime->CreateProgram(context, "buffer/elementwise_add_kernel.cl");
runtime->cl_path() + "/cl_kernel/" + "buffer/elementwise_add_kernel.cl");
auto event = runtime->CreateEvent(context); auto event = runtime->CreateEvent(context);
const std::string build_option("-DCL_DTYPE_float"); const std::string build_option("-DCL_DTYPE_float");
CHECK(runtime->BuildProgram(program.get(), build_option)); CHECK(runtime->BuildProgram(program.get(), build_option));
...@@ -50,7 +45,6 @@ TEST(cl_test, runtime_test) { ...@@ -50,7 +45,6 @@ TEST(cl_test, runtime_test) {
TEST(cl_test, context_test) { TEST(cl_test, context_test) {
auto *runtime = CLRuntime::Global(); auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess()); CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
CLContext context; CLContext context;
context.AddKernel("pool_max", "image/pool_kernel.cl", "-DCL_DTYPE_float"); context.AddKernel("pool_max", "image/pool_kernel.cl", "-DCL_DTYPE_float");
context.AddKernel( context.AddKernel(
...@@ -62,7 +56,6 @@ TEST(cl_test, context_test) { ...@@ -62,7 +56,6 @@ TEST(cl_test, context_test) {
TEST(cl_test, kernel_test) { TEST(cl_test, kernel_test) {
auto *runtime = CLRuntime::Global(); auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess()); CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
std::unique_ptr<CLContext> context(new CLContext); std::unique_ptr<CLContext> context(new CLContext);
context->AddKernel( context->AddKernel(
"elementwise_add", "image/elementwise_add_kernel.cl", "-DCL_DTYPE_float"); "elementwise_add", "image/elementwise_add_kernel.cl", "-DCL_DTYPE_float");
...@@ -121,7 +114,7 @@ TEST(cl_test, kernel_test) { ...@@ -121,7 +114,7 @@ TEST(cl_test, kernel_test) {
} }
TEST(cl_test, target_wrapper_buffer_test) { TEST(cl_test, target_wrapper_buffer_test) {
bool inited = InitOpenCLRuntime(FLAGS_cl_path); bool inited = InitOpenCLRuntime();
CHECK(inited) << "Fail to initialize OpenCL runtime."; CHECK(inited) << "Fail to initialize OpenCL runtime.";
std::unique_ptr<CLContext> context(new CLContext); std::unique_ptr<CLContext> context(new CLContext);
std::string kernel_name = "elementwise_add"; std::string kernel_name = "elementwise_add";
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "lite/backends/opencl/cl_image.h" #include "lite/backends/opencl/cl_image.h"
#include <iostream>
#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_runtime.h" #include "lite/backends/opencl/cl_runtime.h"
#include "lite/backends/opencl/cl_utility.h" #include "lite/backends/opencl/cl_utility.h"
...@@ -42,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) { ...@@ -42,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int stride = cl_image.numel() / 20; int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1; stride = stride > 0 ? stride : 1;
os << " dims: " << cl_image.tensor_dims_ << "\n"; os << " dims: "; // << cl_image.tensor_dims_ << "\n";
for (int i = 0; i < cl_image.numel(); i += stride) { for (int i = 0; i < cl_image.numel(); i += stride) {
os << tensor_data[i] << " "; os << tensor_data[i] << " ";
} }
......
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable #pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define CL_DTYPE float #define CL_DTYPE float
#include <cl_common.h>
__kernel __kernel
void im2col(__global const CL_DTYPE* data_im, const int img_offset, void im2col(__global const CL_DTYPE* data_im, const int img_offset,
const int col_chw, const int col_chw,
......
...@@ -75,13 +75,8 @@ cl::CommandQueue& CLRuntime::command_queue() { ...@@ -75,13 +75,8 @@ cl::CommandQueue& CLRuntime::command_queue() {
std::unique_ptr<cl::Program> CLRuntime::CreateProgram( std::unique_ptr<cl::Program> CLRuntime::CreateProgram(
const cl::Context& context, std::string file_name) { const cl::Context& context, std::string file_name) {
std::ifstream file{file_name, std::ios::binary | std::ios::ate}; auto cl_file = opencl_kernels_files.find(file_name);
CHECK(file.is_open()) << "Can't open file from " << file_name; std::string content(cl_file->second.begin(), cl_file->second.end());
auto size = file.tellg();
CHECK(size > 0) << "size is too small.";
std::string content(size, '\0');
file.seekg(0);
file.read(&content[0], size);
cl::Program::Sources sources; cl::Program::Sources sources;
sources.push_back(content); sources.push_back(content);
auto prog = auto prog =
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <fstream> #include <fstream>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -24,6 +25,9 @@ limitations under the License. */ ...@@ -24,6 +25,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace lite { namespace lite {
extern const std::map<std::string, std::vector<unsigned char>>
opencl_kernels_files;
class CLRuntime { class CLRuntime {
public: public:
static CLRuntime* Global(); static CLRuntime* Global();
......
...@@ -34,9 +34,9 @@ lite_cc_library(scope SRCS scope.cc DEPS tensor) ...@@ -34,9 +34,9 @@ lite_cc_library(scope SRCS scope.cc DEPS tensor)
lite_cc_library(device_info SRCS device_info.cc DEPS tensor) lite_cc_library(device_info SRCS device_info.cc DEPS tensor)
if (LITE_WITH_ARM) if (LITE_WITH_ARM)
lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context gflags) lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context)
else() else()
lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context gflags) lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context)
endif() endif()
#-------------------------------------------- GET CODE META INFO ------------------------------------------ #-------------------------------------------- GET CODE META INFO ------------------------------------------
...@@ -67,6 +67,13 @@ message(STATUS "commit: ${PADDLE_LITE_COMMIT}") ...@@ -67,6 +67,13 @@ message(STATUS "commit: ${PADDLE_LITE_COMMIT}")
configure_file(version.h.in version.h) configure_file(version.h.in version.h)
#----------------------------------------------- NOT CHANGE ----------------------------------------------- #----------------------------------------------- NOT CHANGE -----------------------------------------------
# A trick to generate the opencl_kernels_source.cc
#add_custom_command(
# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/gen_opencl_code.py
# ${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel
# ${CMAKE_BINARY_DIR}/lite/backends/opencl/opencl_kernels_source.cc
# OUTPUT opencl_kernels_source.cc # not a real path to the output to force it execute every time.
# )
# A trick to generate the paddle_use_kernels.h # A trick to generate the paddle_use_kernels.h
add_custom_command( add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
...@@ -96,6 +103,8 @@ add_custom_command( ...@@ -96,6 +103,8 @@ add_custom_command(
add_custom_target(op_list_h DEPENDS ops.h) add_custom_target(op_list_h DEPENDS ops.h)
add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(kernel_list_h DEPENDS kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc)
# create headfile to restore ops info sorted by suppported platforms # create headfile to restore ops info sorted by suppported platforms
add_custom_command( add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
......
...@@ -14,10 +14,6 @@ ...@@ -14,10 +14,6 @@
#include "lite/core/context.h" #include "lite/core/context.h"
#ifdef LITE_WITH_OPENCL
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
#endif
namespace paddle { namespace paddle {
namespace lite {} // namespace lite namespace lite {} // namespace lite
} // namespace paddle } // namespace paddle
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include "lite/backends/cuda/cuda_utils.h" #include "lite/backends/cuda/cuda_utils.h"
#endif #endif
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
#include <gflags/gflags.h>
#include <unordered_map> #include <unordered_map>
#include "lite/backends/opencl/cl_context.h" #include "lite/backends/opencl/cl_context.h"
#include "lite/backends/opencl/cl_runtime.h" #include "lite/backends/opencl/cl_runtime.h"
...@@ -36,10 +35,7 @@ ...@@ -36,10 +35,7 @@
#include "lite/core/target_wrapper.h" #include "lite/core/target_wrapper.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/utils/all.h" #include "lite/utils/all.h"
#include "lite/utils/env.h"
#ifdef LITE_WITH_OPENCL
DECLARE_string(cl_path);
#endif
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -304,7 +300,6 @@ class Context<TargetType::kOpenCL> { ...@@ -304,7 +300,6 @@ class Context<TargetType::kOpenCL> {
void InitOnce() { void InitOnce() {
// Init cl runtime. // Init cl runtime.
CHECK(CLRuntime::Global()->IsInitSuccess()) << "OpenCL runtime init failed"; CHECK(CLRuntime::Global()->IsInitSuccess()) << "OpenCL runtime init failed";
CLRuntime::Global()->set_cl_path(FLAGS_cl_path);
cl_context_ = std::make_shared<CLContext>(); cl_context_ = std::make_shared<CLContext>();
cl_wait_list_ = std::make_shared<WaitListType>(); cl_wait_list_ = std::make_shared<WaitListType>();
......
...@@ -34,48 +34,37 @@ add_kernel(grid_sampler_opencl OPENCL basic SRCS grid_sampler_image_compute.cc D ...@@ -34,48 +34,37 @@ add_kernel(grid_sampler_opencl OPENCL basic SRCS grid_sampler_image_compute.cc D
# image kernel test # # image kernel test #
###################### ######################
lite_cc_test(test_activation_image_opencl SRCS activation_image_compute_test.cc lite_cc_test(test_activation_image_opencl SRCS activation_image_compute_test.cc
DEPS activation_opencl layout_opencl op_registry program context DEPS activation_opencl layout_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_conv_image_opencl SRCS conv_image_compute_test.cc lite_cc_test(test_conv_image_opencl SRCS conv_image_compute_test.cc
DEPS conv_opencl op_registry program context DEPS conv_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_depthwise_conv2d_image_opencl SRCS depthwise_conv2d_image_compute_test.cc lite_cc_test(test_depthwise_conv2d_image_opencl SRCS depthwise_conv2d_image_compute_test.cc
DEPS conv_opencl op_registry program context DEPS conv_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_nearest_interp_image_opencl SRCS nearest_interp_image_compute_test.cc lite_cc_test(test_nearest_interp_image_opencl SRCS nearest_interp_image_compute_test.cc
DEPS nearest_interp_opencl layout_opencl op_registry program context DEPS nearest_interp_opencl layout_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_pool_image_opencl SRCS pool_image_compute_test.cc lite_cc_test(test_pool_image_opencl SRCS pool_image_compute_test.cc
DEPS pool_opencl op_registry program context DEPS pool_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_scale_image_opencl SRCS scale_image_compute_test.cc lite_cc_test(test_scale_image_opencl SRCS scale_image_compute_test.cc
DEPS scale_opencl op_registry program context DEPS scale_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_reshape_image_opencl SRCS reshape_image_compute_test.cc lite_cc_test(test_reshape_image_opencl SRCS reshape_image_compute_test.cc
DEPS reshape_opencl op_registry program context DEPS reshape_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_concat_image_opencl SRCS concat_image_compute_test.cc lite_cc_test(test_concat_image_opencl SRCS concat_image_compute_test.cc
DEPS concat_opencl layout_opencl op_registry program context DEPS concat_opencl layout_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_elementwise_mul_image_opencl SRCS elementwise_mul_image_compute_test.cc lite_cc_test(test_elementwise_mul_image_opencl SRCS elementwise_mul_image_compute_test.cc
DEPS elementwise_mul_opencl op_registry program context DEPS elementwise_mul_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_layout_image_opencl SRCS layout_image_compute_test.cc lite_cc_test(test_layout_image_opencl SRCS layout_image_compute_test.cc
DEPS layout_opencl op_registry program context DEPS layout_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_elementwise_add_image_opencl SRCS elementwise_add_image_compute_test.cc lite_cc_test(test_elementwise_add_image_opencl SRCS elementwise_add_image_compute_test.cc
DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_grid_sampler_image_opencl SRCS grid_sampler_image_compute_test.cc lite_cc_test(test_grid_sampler_image_opencl SRCS grid_sampler_image_compute_test.cc
DEPS grid_sampler_opencl op_registry program context DEPS grid_sampler_opencl op_registry program context
...@@ -107,37 +96,28 @@ add_kernel(io_copy_opencl OPENCL basic SRCS io_copy_buffer_compute.cc DEPS ${ten ...@@ -107,37 +96,28 @@ add_kernel(io_copy_opencl OPENCL basic SRCS io_copy_buffer_compute.cc DEPS ${ten
# buffer kernel test # # buffer kernel test #
###################### ######################
#lite_cc_test(test_activation_buffer_opencl SRCS activation_buffer_compute_test.cc #lite_cc_test(test_activation_buffer_opencl SRCS activation_buffer_compute_test.cc
# DEPS activation_opencl op_registry program context # DEPS activation_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_conv_buffer_opencl SRCS conv_buffer_compute_test.cc #lite_cc_test(test_conv_buffer_opencl SRCS conv_buffer_compute_test.cc
# DEPS conv_opencl op_registry program context # DEPS conv_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_depthwise_conv2d_buffer_opencl SRCS depthwise_conv2d_buffer_compute_test.cc #lite_cc_test(test_depthwise_conv2d_buffer_opencl SRCS depthwise_conv2d_buffer_compute_test.cc
# DEPS depthwise_conv2d_opencl op_registry program context # DEPS depthwise_conv2d_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_pool_buffer_opencl SRCS pool_buffer_compute_test.cc #lite_cc_test(test_pool_buffer_opencl SRCS pool_buffer_compute_test.cc
# DEPS pool_opencl op_registry program context # DEPS pool_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_concat_buffer_opencl SRCS concat_buffer_compute_test.cc #lite_cc_test(test_concat_buffer_opencl SRCS concat_buffer_compute_test.cc
# DEPS concat_opencl op_registry program context # DEPS concat_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_fc_buffer_opencl SRCS fc_buffer_compute_test.cc lite_cc_test(test_fc_buffer_opencl SRCS fc_buffer_compute_test.cc
DEPS fc_opencl op_registry program context DEPS fc_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc
DEPS mul_opencl op_registry program context DEPS mul_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc #lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc
# DEPS elementwise_add_opencl op_registry program context # DEPS elementwise_add_opencl op_registry program context)
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc
DEPS io_copy_opencl op_registry program context DEPS io_copy_opencl op_registry program context)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
...@@ -42,13 +42,26 @@ function prepare_workspace { ...@@ -42,13 +42,26 @@ function prepare_workspace {
GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code
mkdir -p ${GEN_CODE_PATH_PREFIX} mkdir -p ${GEN_CODE_PATH_PREFIX}
touch ${GEN_CODE_PATH_PREFIX}/__generated_code__.cc touch ${GEN_CODE_PATH_PREFIX}/__generated_code__.cc
# 2.Prepare debug tool # 2.Prepare debug tool
DEBUG_TOOL_PATH_PREFIX=$build_dir/lite/tools/debug DEBUG_TOOL_PATH_PREFIX=$build_dir/lite/tools/debug
mkdir -p ${DEBUG_TOOL_PATH_PREFIX} mkdir -p ${DEBUG_TOOL_PATH_PREFIX}
cp $root_dir/lite/tools/debug/analysis_tool.py ${DEBUG_TOOL_PATH_PREFIX}/ cp $root_dir/lite/tools/debug/analysis_tool.py ${DEBUG_TOOL_PATH_PREFIX}/
} }
function prepare_opencl_source_code {
local root_dir=$1
local build_dir=$2
# in build directory
# Prepare opencl_kernels_source.cc file
GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl
rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel
mkdir -p ${GEN_CODE_PATH_OPENCL}
touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
}
function prepare_thirdparty { function prepare_thirdparty {
if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then
rm -rf $workspace/third-party rm -rf $workspace/third-party
...@@ -113,6 +126,45 @@ function make_tiny_publish_so { ...@@ -113,6 +126,45 @@ function make_tiny_publish_so {
cd - > /dev/null cd - > /dev/null
} }
function make_opencl {
local os=$1
local abi=$2
local lang=$3
#git submodule update --init --recursive
prepare_thirdparty
root_dir=$(pwd)
build_dir=$root_dir/build.lite.${os}.${abi}.${lang}.opencl
if [ -d $build_directory ]
then
rm -rf $build_directory
fi
mkdir -p $build_dir
cd $build_dir
prepare_workspace $root_dir $build_dir
prepare_opencl_source_code $root_dir $build_dir
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_ON_TINY_PUBLISH=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=OFF \
-DLITE_BUILD_EXTRA=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3
make opencl_clhpp -j4
make publish_inference -j4
}
function make_full_publish_so { function make_full_publish_so {
local os=$1 local os=$1
local abi=$2 local abi=$2
...@@ -398,6 +450,10 @@ function main { ...@@ -398,6 +450,10 @@ function main {
build_opt build_opt
shift shift
;; ;;
opencl)
make_opencl $ARM_OS $ARM_ABI $ARM_LANG
shift
;;
cuda) cuda)
make_cuda make_cuda
shift shift
......
...@@ -37,6 +37,19 @@ function prepare_thirdparty { ...@@ -37,6 +37,19 @@ function prepare_thirdparty {
fi fi
} }
function prepare_opencl_source_code {
local root_dir=$1
local build_dir=$2
# in build directory
# Prepare opencl_kernels_source.cc file
GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl
rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel
mkdir -p ${GEN_CODE_PATH_OPENCL}
touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
}
# prepare adb devices # prepare adb devices
# if USE_ADB_EMULATOR=ON , we create adb emulator port_armv8 and port_armv7 for usage, else we will use actual mobilephone according to adbindex. # if USE_ADB_EMULATOR=ON , we create adb emulator port_armv8 and port_armv7 for usage, else we will use actual mobilephone according to adbindex.
function prepare_adb_devices { function prepare_adb_devices {
...@@ -173,6 +186,8 @@ function build_opencl { ...@@ -173,6 +186,8 @@ function build_opencl {
mkdir -p $build_dir mkdir -p $build_dir
cd $build_dir cd $build_dir
prepare_opencl_source_code $cur_dir $build_dir
cmake_opencl ${os} ${abi} ${lang} cmake_opencl ${os} ${abi} ${lang}
make opencl_clhpp make opencl_clhpp
build $TESTS_FILE build $TESTS_FILE
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import os
import sys
import logging
opencl_kernel_path=""
opencl_dest_path=""
def gen_opencl_kernels():
source = """
#pragma
#ifdef LITE_WITH_OPENCL
#include <map>
#include <string>
#include <vector>
namespace paddle {
namespace lite {
// file name => source
extern const std::map<std::string, std::vector<unsigned char>> opencl_kernels_files = {
%s
};
} // namespace lite
} // namespace paddle
#endif
"""
def clean_source(content):
new_content = re.sub(r"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/", "", content, flags=re.DOTALL)
lines = new_content.split("\n")
new_lines = []
for i in range(len(lines)):
line = lines[i]
line = re.sub(r"//.*$", "", line)
line = line.strip()
if line == "":
continue
new_lines.append(line)
new_content = "\n".join(new_lines)
return new_content
infile = open(opencl_kernel_path+"/cl_common.h", "r")
common_content = infile.read()
infile.close()
common_content = clean_source(common_content)
def get_header_raw(content):
lines = content.split("\n")
new_lines = []
for line in lines:
if "__kernel void" in line:
break
new_lines.append(line)
header = "\n".join(new_lines)
return header
common_header = get_header_raw(common_content)
def get_header(content):
lines = content.split("\n")
new_lines = []
for line in lines:
if "__kernel" in line:
break
new_lines.append(line)
for i in range(len(lines)):
if "#include \"cl_common.h\"" in lines[i] or "#include <cl_common.h>" in lines[i]:
lines[i] = common_header
header = "\n".join(lines)
return header
filenames = os.listdir(opencl_kernel_path+"/buffer")
file_count = len(filenames)
headers = {}
funcs = {}
for i in range(file_count):
filename = filenames[i]
infile = open(opencl_kernel_path+"/buffer/" + filename, "r")
content = infile.read()
infile.close()
content = clean_source(content)
header = get_header(content)
headers["buffer/" + filename] = header
image_filenames = os.listdir(opencl_kernel_path+"/image")
image_file_count = len(image_filenames)
for i in range(image_file_count):
filename = image_filenames[i]
infile = open(opencl_kernel_path+"/image/" + filename, "r")
content = infile.read()
infile.close()
content = clean_source(content)
header = get_header(content)
headers["image/" + filename] = header
core1 = ""
for i in range(len(headers)):
file_name = list(headers.keys())[i]
content = headers[file_name]
if content == "":
content = " "
hexes = []
for char in content:
hexes.append(hex(ord(char)))
core = " {\"%s\", {" % file_name
for item in hexes:
core += str(item) + ", "
core = core[: -2]
core += "}}"
if i != len(headers) - 1:
core += ",\n"
core1 += core
source = source % (core1)
with open(opencl_dest_path, 'w') as f:
logging.info("write opencl kernels source files to %s" % opencl_dest_path)
f.write(source)
def gen_empty_opencl_kernels():
source = """
#pragma once
#ifdef PADDLE_MOBILE_CL
#include <map>
#include <string>
#include <vector>
namespace paddle_mobile {
// func name => source
extern const std::map<std::string, std::vector<unsigned char>> opencl_kernels = {
};
}
#endif
"""
if __name__ == "__main__":
opencl_kernel_path = sys.argv[1]
opencl_dest_path = sys.argv[2]
gen_opencl_kernels()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册