提交 dfcbfbdc 编写于 作者: H huzhiqiang 提交者: GitHub

[opencl compile] add into build.sh (#3031)

* test=devellop

* add cl file into resulted lib test=develop

* test=develop

* test=develop
上级 50b9e85f
......@@ -282,6 +282,10 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/opencl"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel" "${INFER_LITE_PUBLISH_ROOT}/opencl"
)
if (NOT LITE_ON_TINY_PUBLISH)
add_dependencies(publish_inference_cxx_lib publish_inference_opencl)
else()
add_dependencies(tiny_publish_cxx_lib publish_inference_opencl)
endif()
endif()
endif()
......@@ -2,18 +2,17 @@ if (NOT LITE_WITH_OPENCL)
return()
endif()
lite_cc_library(opencl_kernels_source_cc SRCS opencl_kernels_source.cc)
lite_cc_library(cl_wrapper SRCS cl_wrapper.cc)
lite_cc_library(cl_utility SRCS cl_utility.cc DEPS cl_wrapper)
lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility)
lite_cc_library(cl_runtime SRCS cl_runtime.cc DEPS cl_utility opencl_kernels_source_cc)
lite_cc_library(cl_context SRCS cl_context.cc DEPS cl_runtime)
lite_cc_library(cl_half SRCS cl_half.cc)
lite_cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS tensor cl_half)
lite_cc_library(cl_image SRCS cl_image.cc DEPS tensor cl_image_converter cl_runtime)
lite_cc_library(cl_caller SRCS cl_caller.cc DEPS cl_context cl_image)
lite_cc_library(cl_target_wrapper SRCS target_wrapper.cc DEPS cl_runtime)
lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_cl_functions SRCS cl_functions_test.cc DEPS cl_context cl_image cl_caller cl_wrapper cl_target_wrapper)
lite_cc_test(test_cl_im2col SRCS cl_im2col_test.cc DEPS tensor cl_context cl_wrapper cl_target_wrapper)
add_dependencies(cl_wrapper opencl_clhpp)
......@@ -46,9 +46,8 @@ static void CopyImageData(CLContext* context,
delete[] image_data;
}
bool InitOpenCLRuntime(std::string cl_path) {
bool InitOpenCLRuntime() {
auto* runtime = CLRuntime::Global();
runtime->set_cl_path(cl_path);
return runtime->IsInitSuccess();
}
......
......@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle {
namespace lite {
bool InitOpenCLRuntime(std::string cl_path);
bool InitOpenCLRuntime();
} // namespace lite
} // namespace paddle
......@@ -41,8 +41,7 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
return *(it->second);
}
auto program = CLRuntime::Global()->CreateProgram(
GetContext(), CLRuntime::Global()->cl_path() + "/cl_kernel/" + file_name);
auto program = CLRuntime::Global()->CreateProgram(GetContext(), file_name);
VLOG(3) << " --- begin build program -> " << program_key << " --- ";
CLRuntime::Global()->BuildProgram(program.get(), options);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
......@@ -26,22 +25,18 @@ limitations under the License. */
#include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h"
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
namespace paddle {
namespace lite {
TEST(cl_test, runtime_test) {
auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
runtime->platform();
runtime->device();
runtime->command_queue();
auto &context = runtime->context();
auto program = runtime->CreateProgram(
context,
runtime->cl_path() + "/cl_kernel/" + "buffer/elementwise_add_kernel.cl");
auto program =
runtime->CreateProgram(context, "buffer/elementwise_add_kernel.cl");
auto event = runtime->CreateEvent(context);
const std::string build_option("-DCL_DTYPE_float");
CHECK(runtime->BuildProgram(program.get(), build_option));
......@@ -50,7 +45,6 @@ TEST(cl_test, runtime_test) {
TEST(cl_test, context_test) {
auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
CLContext context;
context.AddKernel("pool_max", "image/pool_kernel.cl", "-DCL_DTYPE_float");
context.AddKernel(
......@@ -62,7 +56,6 @@ TEST(cl_test, context_test) {
TEST(cl_test, kernel_test) {
auto *runtime = CLRuntime::Global();
CHECK(runtime->IsInitSuccess());
runtime->set_cl_path(FLAGS_cl_path);
std::unique_ptr<CLContext> context(new CLContext);
context->AddKernel(
"elementwise_add", "image/elementwise_add_kernel.cl", "-DCL_DTYPE_float");
......@@ -121,7 +114,7 @@ TEST(cl_test, kernel_test) {
}
TEST(cl_test, target_wrapper_buffer_test) {
bool inited = InitOpenCLRuntime(FLAGS_cl_path);
bool inited = InitOpenCLRuntime();
CHECK(inited) << "Fail to initialize OpenCL runtime.";
std::unique_ptr<CLContext> context(new CLContext);
std::string kernel_name = "elementwise_add";
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/backends/opencl/cl_image.h"
#include <iostream>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_runtime.h"
#include "lite/backends/opencl/cl_utility.h"
......@@ -42,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1;
os << " dims: " << cl_image.tensor_dims_ << "\n";
os << " dims: "; // << cl_image.tensor_dims_ << "\n";
for (int i = 0; i < cl_image.numel(); i += stride) {
os << tensor_data[i] << " ";
}
......
......@@ -15,6 +15,8 @@ limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define CL_DTYPE float
#include <cl_common.h>
__kernel
void im2col(__global const CL_DTYPE* data_im, const int img_offset,
const int col_chw,
......
......@@ -75,13 +75,8 @@ cl::CommandQueue& CLRuntime::command_queue() {
std::unique_ptr<cl::Program> CLRuntime::CreateProgram(
const cl::Context& context, std::string file_name) {
std::ifstream file{file_name, std::ios::binary | std::ios::ate};
CHECK(file.is_open()) << "Can't open file from " << file_name;
auto size = file.tellg();
CHECK(size > 0) << "size is too small.";
std::string content(size, '\0');
file.seekg(0);
file.read(&content[0], size);
auto cl_file = opencl_kernels_files.find(file_name);
std::string content(cl_file->second.begin(), cl_file->second.end());
cl::Program::Sources sources;
sources.push_back(content);
auto prog =
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -24,6 +25,9 @@ limitations under the License. */
namespace paddle {
namespace lite {
extern const std::map<std::string, std::vector<unsigned char>>
opencl_kernels_files;
class CLRuntime {
public:
static CLRuntime* Global();
......
......@@ -34,9 +34,9 @@ lite_cc_library(scope SRCS scope.cc DEPS tensor)
lite_cc_library(device_info SRCS device_info.cc DEPS tensor)
if (LITE_WITH_ARM)
lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context gflags)
lite_cc_library(context SRCS context.cc DEPS tensor any device_info CL_DEPS cl_context)
else()
lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context gflags)
lite_cc_library(context SRCS context.cc DEPS tensor any device_info eigen3 CL_DEPS cl_context)
endif()
#-------------------------------------------- GET CODE META INFO ------------------------------------------
......@@ -67,6 +67,13 @@ message(STATUS "commit: ${PADDLE_LITE_COMMIT}")
configure_file(version.h.in version.h)
#----------------------------------------------- NOT CHANGE -----------------------------------------------
# A trick to generate the opencl_kernels_source.cc
#add_custom_command(
# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/gen_opencl_code.py
# ${CMAKE_SOURCE_DIR}/lite/backends/opencl/cl_kernel
# ${CMAKE_BINARY_DIR}/lite/backends/opencl/opencl_kernels_source.cc
# OUTPUT opencl_kernels_source.cc # not a real path to the output to force it execute every time.
# )
# A trick to generate the paddle_use_kernels.h
add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
......@@ -96,6 +103,8 @@ add_custom_command(
add_custom_target(op_list_h DEPENDS ops.h)
add_custom_target(kernel_list_h DEPENDS kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
#add_custom_target(opencl_kernels_source_cc DEPENDS opencl_kernels_source.cc)
# create headfile to restore ops info sorted by suppported platforms
add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
......
......@@ -14,10 +14,6 @@
#include "lite/core/context.h"
#ifdef LITE_WITH_OPENCL
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
#endif
namespace paddle {
namespace lite {} // namespace lite
} // namespace paddle
......@@ -20,7 +20,6 @@
#include "lite/backends/cuda/cuda_utils.h"
#endif
#ifdef LITE_WITH_OPENCL
#include <gflags/gflags.h>
#include <unordered_map>
#include "lite/backends/opencl/cl_context.h"
#include "lite/backends/opencl/cl_runtime.h"
......@@ -36,10 +35,7 @@
#include "lite/core/target_wrapper.h"
#include "lite/core/tensor.h"
#include "lite/utils/all.h"
#ifdef LITE_WITH_OPENCL
DECLARE_string(cl_path);
#endif
#include "lite/utils/env.h"
namespace paddle {
namespace lite {
......@@ -304,7 +300,6 @@ class Context<TargetType::kOpenCL> {
void InitOnce() {
// Init cl runtime.
CHECK(CLRuntime::Global()->IsInitSuccess()) << "OpenCL runtime init failed";
CLRuntime::Global()->set_cl_path(FLAGS_cl_path);
cl_context_ = std::make_shared<CLContext>();
cl_wait_list_ = std::make_shared<WaitListType>();
......
......@@ -34,48 +34,37 @@ add_kernel(grid_sampler_opencl OPENCL basic SRCS grid_sampler_image_compute.cc D
# image kernel test #
######################
lite_cc_test(test_activation_image_opencl SRCS activation_image_compute_test.cc
DEPS activation_opencl layout_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS activation_opencl layout_opencl op_registry program context)
lite_cc_test(test_conv_image_opencl SRCS conv_image_compute_test.cc
DEPS conv_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS conv_opencl op_registry program context)
lite_cc_test(test_depthwise_conv2d_image_opencl SRCS depthwise_conv2d_image_compute_test.cc
DEPS conv_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS conv_opencl op_registry program context)
lite_cc_test(test_nearest_interp_image_opencl SRCS nearest_interp_image_compute_test.cc
DEPS nearest_interp_opencl layout_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS nearest_interp_opencl layout_opencl op_registry program context)
lite_cc_test(test_pool_image_opencl SRCS pool_image_compute_test.cc
DEPS pool_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS pool_opencl op_registry program context)
lite_cc_test(test_scale_image_opencl SRCS scale_image_compute_test.cc
DEPS scale_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS scale_opencl op_registry program context)
lite_cc_test(test_reshape_image_opencl SRCS reshape_image_compute_test.cc
DEPS reshape_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS reshape_opencl op_registry program context)
lite_cc_test(test_concat_image_opencl SRCS concat_image_compute_test.cc
DEPS concat_opencl layout_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS concat_opencl layout_opencl op_registry program context)
lite_cc_test(test_elementwise_mul_image_opencl SRCS elementwise_mul_image_compute_test.cc
DEPS elementwise_mul_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS elementwise_mul_opencl op_registry program context)
lite_cc_test(test_layout_image_opencl SRCS layout_image_compute_test.cc
DEPS layout_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS layout_opencl op_registry program context)
lite_cc_test(test_elementwise_add_image_opencl SRCS elementwise_add_image_compute_test.cc
DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context)
lite_cc_test(test_grid_sampler_image_opencl SRCS grid_sampler_image_compute_test.cc
DEPS grid_sampler_opencl op_registry program context
......@@ -107,37 +96,28 @@ add_kernel(io_copy_opencl OPENCL basic SRCS io_copy_buffer_compute.cc DEPS ${ten
# buffer kernel test #
######################
#lite_cc_test(test_activation_buffer_opencl SRCS activation_buffer_compute_test.cc
# DEPS activation_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS activation_opencl op_registry program context)
#lite_cc_test(test_conv_buffer_opencl SRCS conv_buffer_compute_test.cc
# DEPS conv_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS conv_opencl op_registry program context)
#lite_cc_test(test_depthwise_conv2d_buffer_opencl SRCS depthwise_conv2d_buffer_compute_test.cc
# DEPS depthwise_conv2d_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS depthwise_conv2d_opencl op_registry program context)
#lite_cc_test(test_pool_buffer_opencl SRCS pool_buffer_compute_test.cc
# DEPS pool_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS pool_opencl op_registry program context)
#lite_cc_test(test_concat_buffer_opencl SRCS concat_buffer_compute_test.cc
# DEPS concat_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS concat_opencl op_registry program context)
lite_cc_test(test_fc_buffer_opencl SRCS fc_buffer_compute_test.cc
DEPS fc_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS fc_opencl op_registry program context)
lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc
DEPS mul_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS mul_opencl op_registry program context)
#lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc
# DEPS elementwise_add_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS elementwise_add_opencl op_registry program context)
lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc
DEPS io_copy_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
DEPS io_copy_opencl op_registry program context)
......@@ -42,13 +42,26 @@ function prepare_workspace {
GEN_CODE_PATH_PREFIX=$build_dir/lite/gen_code
mkdir -p ${GEN_CODE_PATH_PREFIX}
touch ${GEN_CODE_PATH_PREFIX}/__generated_code__.cc
# 2.Prepare debug tool
DEBUG_TOOL_PATH_PREFIX=$build_dir/lite/tools/debug
mkdir -p ${DEBUG_TOOL_PATH_PREFIX}
cp $root_dir/lite/tools/debug/analysis_tool.py ${DEBUG_TOOL_PATH_PREFIX}/
}
function prepare_opencl_source_code {
local root_dir=$1
local build_dir=$2
# in build directory
# Prepare opencl_kernels_source.cc file
GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl
rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel
mkdir -p ${GEN_CODE_PATH_OPENCL}
touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
}
function prepare_thirdparty {
if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then
rm -rf $workspace/third-party
......@@ -113,6 +126,45 @@ function make_tiny_publish_so {
cd - > /dev/null
}
function make_opencl {
local os=$1
local abi=$2
local lang=$3
#git submodule update --init --recursive
prepare_thirdparty
root_dir=$(pwd)
build_dir=$root_dir/build.lite.${os}.${abi}.${lang}.opencl
if [ -d $build_directory ]
then
rm -rf $build_directory
fi
mkdir -p $build_dir
cd $build_dir
prepare_workspace $root_dir $build_dir
prepare_opencl_source_code $root_dir $build_dir
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DWITH_ARM_DOTPROD=ON \
-DLITE_ON_TINY_PUBLISH=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=OFF \
-DLITE_BUILD_EXTRA=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3
make opencl_clhpp -j4
make publish_inference -j4
}
function make_full_publish_so {
local os=$1
local abi=$2
......@@ -398,6 +450,10 @@ function main {
build_opt
shift
;;
opencl)
make_opencl $ARM_OS $ARM_ABI $ARM_LANG
shift
;;
cuda)
make_cuda
shift
......
......@@ -37,6 +37,19 @@ function prepare_thirdparty {
fi
}
function prepare_opencl_source_code {
local root_dir=$1
local build_dir=$2
# in build directory
# Prepare opencl_kernels_source.cc file
GEN_CODE_PATH_OPENCL=$root_dir/lite/backends/opencl
rm -f GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
OPENCL_KERNELS_PATH=$root_dir/lite/backends/opencl/cl_kernel
mkdir -p ${GEN_CODE_PATH_OPENCL}
touch $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
python $root_dir/lite/tools/cmake_tools/gen_opencl_code.py $OPENCL_KERNELS_PATH $GEN_CODE_PATH_OPENCL/opencl_kernels_source.cc
}
# prepare adb devices
# if USE_ADB_EMULATOR=ON , we create adb emulator port_armv8 and port_armv7 for usage, else we will use actual mobilephone according to adbindex.
function prepare_adb_devices {
......@@ -173,6 +186,8 @@ function build_opencl {
mkdir -p $build_dir
cd $build_dir
prepare_opencl_source_code $cur_dir $build_dir
cmake_opencl ${os} ${abi} ${lang}
make opencl_clhpp
build $TESTS_FILE
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import os
import sys
import logging
opencl_kernel_path=""
opencl_dest_path=""
def gen_opencl_kernels():
source = """
#pragma
#ifdef LITE_WITH_OPENCL
#include <map>
#include <string>
#include <vector>
namespace paddle {
namespace lite {
// file name => source
extern const std::map<std::string, std::vector<unsigned char>> opencl_kernels_files = {
%s
};
} // namespace lite
} // namespace paddle
#endif
"""
def clean_source(content):
new_content = re.sub(r"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/", "", content, flags=re.DOTALL)
lines = new_content.split("\n")
new_lines = []
for i in range(len(lines)):
line = lines[i]
line = re.sub(r"//.*$", "", line)
line = line.strip()
if line == "":
continue
new_lines.append(line)
new_content = "\n".join(new_lines)
return new_content
infile = open(opencl_kernel_path+"/cl_common.h", "r")
common_content = infile.read()
infile.close()
common_content = clean_source(common_content)
def get_header_raw(content):
lines = content.split("\n")
new_lines = []
for line in lines:
if "__kernel void" in line:
break
new_lines.append(line)
header = "\n".join(new_lines)
return header
common_header = get_header_raw(common_content)
def get_header(content):
lines = content.split("\n")
new_lines = []
for line in lines:
if "__kernel" in line:
break
new_lines.append(line)
for i in range(len(lines)):
if "#include \"cl_common.h\"" in lines[i] or "#include <cl_common.h>" in lines[i]:
lines[i] = common_header
header = "\n".join(lines)
return header
filenames = os.listdir(opencl_kernel_path+"/buffer")
file_count = len(filenames)
headers = {}
funcs = {}
for i in range(file_count):
filename = filenames[i]
infile = open(opencl_kernel_path+"/buffer/" + filename, "r")
content = infile.read()
infile.close()
content = clean_source(content)
header = get_header(content)
headers["buffer/" + filename] = header
image_filenames = os.listdir(opencl_kernel_path+"/image")
image_file_count = len(image_filenames)
for i in range(image_file_count):
filename = image_filenames[i]
infile = open(opencl_kernel_path+"/image/" + filename, "r")
content = infile.read()
infile.close()
content = clean_source(content)
header = get_header(content)
headers["image/" + filename] = header
core1 = ""
for i in range(len(headers)):
file_name = list(headers.keys())[i]
content = headers[file_name]
if content == "":
content = " "
hexes = []
for char in content:
hexes.append(hex(ord(char)))
core = " {\"%s\", {" % file_name
for item in hexes:
core += str(item) + ", "
core = core[: -2]
core += "}}"
if i != len(headers) - 1:
core += ",\n"
core1 += core
source = source % (core1)
with open(opencl_dest_path, 'w') as f:
logging.info("write opencl kernels source files to %s" % opencl_dest_path)
f.write(source)
def gen_empty_opencl_kernels():
source = """
#pragma once
#ifdef PADDLE_MOBILE_CL
#include <map>
#include <string>
#include <vector>
namespace paddle_mobile {
// func name => source
extern const std::map<std::string, std::vector<unsigned char>> opencl_kernels = {
};
}
#endif
"""
if __name__ == "__main__":
opencl_kernel_path = sys.argv[1]
opencl_dest_path = sys.argv[2]
gen_opencl_kernels()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册