提交 884267bf 编写于 作者: L Liangliang He

Merge branch 'gen_opencl_kernel_binary' into 'master'

Generate opencl kernel binary

See merge request !155
......@@ -10,30 +10,74 @@
#include "mace/core/logging.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include <CL/opencl.h>
namespace mace {
namespace {
bool ReadSourceFile(const std::string &filename, std::string *content) {
MACE_CHECK_NOTNULL(content);
*content = "";
std::ifstream ifs(filename, std::ifstream::in);
bool ReadFile(const std::string &filename, bool binary,
std::vector<unsigned char> *content_ptr) {
MACE_CHECK_NOTNULL(content_ptr);
std::ios_base::openmode mode = std::ios::in;
if (binary) {
mode |= std::ios::binary;
}
std::ifstream ifs(filename, mode);
// Stop eating new lines and whitespace
ifs.unsetf(std::ios::skipws);
if (!ifs.is_open()) {
LOG(ERROR) << "Failed to open file " << filename;
return false;
}
std::string line;
while (std::getline(ifs, line)) {
*content += line;
*content += "\n";
ifs.seekg(0, std::ios::end);
const size_t filesize = ifs.tellg();
if (filesize > 10 * 1024 * 1024) {
LOG(ERROR) << "Filesize overflow 10MB";
return false;
}
content_ptr->reserve(filesize);
ifs.seekg(0, std::ios::beg);
content_ptr->insert(content_ptr->begin(), std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>());
if (ifs.fail()) {
LOG(ERROR) << "Failed to read from file " << filename;
return false;
}
ifs.close();
return true;
}
bool WriteFile(const std::string &filename, bool binary,
const std::vector<unsigned char> &content) {
std::ios_base::openmode mode = std::ios::out;
if (binary) {
mode |= std::ios::binary;
}
std::ofstream ofs(filename, mode);
ofs.write(reinterpret_cast<const char *>(&content[0]),
content.size() * sizeof(char));
ofs.close();
if (ofs.fail()) {
LOG(ERROR) << "Failed to write to file " << filename;
return false;
}
return true;
}
} // namespace
bool OpenCLRuntime::enable_profiling_ = false;
std::unique_ptr<cl::Event> OpenCLRuntime::profiling_ev_ = NULL;
std::unique_ptr<cl::Event> OpenCLRuntime::profiling_ev_ = nullptr;
OpenCLRuntime *OpenCLRuntime::Get() {
static std::once_flag init_once;
......@@ -97,13 +141,9 @@ OpenCLRuntime *OpenCLRuntime::Get() {
return instance;
}
void OpenCLRuntime::EnableProfiling() {
enable_profiling_ = true;
}
void OpenCLRuntime::EnableProfiling() { enable_profiling_ = true; }
cl::Event* OpenCLRuntime::GetDefaultEvent() {
return profiling_ev_.get();
}
cl::Event *OpenCLRuntime::GetDefaultEvent() { return profiling_ev_.get(); }
cl_ulong OpenCLRuntime::GetEventProfilingStartInfo() {
MACE_CHECK(profiling_ev_, "is NULL, should enable profiling first.");
......@@ -115,8 +155,7 @@ cl_ulong OpenCLRuntime::GetEventProfilingEndInfo() {
return profiling_ev_->getProfilingInfo<CL_PROFILING_COMMAND_END>();
}
OpenCLRuntime::OpenCLRuntime(cl::Context context,
cl::Device device,
OpenCLRuntime::OpenCLRuntime(cl::Context context, cl::Device device,
cl::CommandQueue command_queue)
: context_(context), device_(device), command_queue_(command_queue) {
const char *kernel_path = getenv("MACE_KERNEL_PATH");
......@@ -137,8 +176,7 @@ cl::Program &OpenCLRuntime::program() {
}
// TODO(heliangliang) Support binary format
const std::map<std::string, std::string>
OpenCLRuntime::program_map_ = {
const std::map<std::string, std::string> OpenCLRuntime::program_map_ = {
{"addn", "addn.cl"},
{"batch_norm", "batch_norm.cl"},
{"bias_add", "bias_add.cl"},
......@@ -154,20 +192,54 @@ const std::map<std::string, std::string>
{"space_to_batch", "space_to_batch.cl"},
};
std::string
OpenCLRuntime::GenerateCLBinaryFilenamePrefix(const std::string &filename_msg) {
std::string filename_prefix = filename_msg;
for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) {
if (*it == ' ' || *it == '-' || *it == '=') {
*it = '_';
}
}
return filename_prefix;
}
void OpenCLRuntime::BuildProgram(const std::string &program_file_name,
const std::string &binary_file_name_prefix,
const std::string &build_options,
cl::Program *program) {
MACE_CHECK_NOTNULL(program);
std::string source_filename = kernel_path_ + program_file_name;
std::string binary_filename = kernel_path_ + binary_file_name_prefix + ".bin";
// Create program
bool is_binary_filename_exist = std::ifstream(binary_filename).is_open();
if (is_binary_filename_exist) {
VLOG(1) << "Create program with binary: " << binary_filename;
std::vector<unsigned char> binaries;
MACE_CHECK(ReadFile(binary_filename, true, &binaries));
*program = cl::Program(this->context(), {device()}, {binaries});
} else if (std::ifstream(source_filename).is_open()) {
VLOG(1) << "Create program with source: " << source_filename;
std::vector<unsigned char> kernel_source;
MACE_CHECK(ReadFile(source_filename, false, &kernel_source));
cl::Program::Sources sources;
std::string filename = kernel_path_ + program_file_name;
std::string kernel_source;
MACE_CHECK(ReadSourceFile(filename, &kernel_source));
sources.push_back({kernel_source.c_str(), kernel_source.length()});
sources.push_back(std::string(kernel_source.begin(), kernel_source.end()));
*program = cl::Program(this->context(), sources);
std::string build_options_str = build_options +
" -Werror -cl-mad-enable -cl-fast-relaxed-math -I" + kernel_path_;
} else {
LOG(FATAL) << "Failed to open kernel file " << binary_filename << " or "
<< source_filename;
}
// Build program
std::string build_options_str =
build_options + " -Werror -cl-mad-enable -cl-fast-relaxed-math -I" +
kernel_path_;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
cl_int ret = program->build({device()}, build_options_str.c_str());
if (ret != CL_SUCCESS) {
......@@ -179,9 +251,38 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name,
}
LOG(FATAL) << "Build program failed: " << ret;
}
// Write binary if necessary
if (!is_binary_filename_exist) {
size_t device_list_size = 1;
std::unique_ptr<size_t[]> program_binary_sizes(
new size_t[device_list_size]);
cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * device_list_size,
program_binary_sizes.get(), nullptr);
MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err;
std::unique_ptr<std::unique_ptr<unsigned char[]>[]> program_binaries(
new std::unique_ptr<unsigned char[]>[ device_list_size ]);
for (cl_uint i = 0; i < device_list_size; ++i) {
program_binaries[i] = std::unique_ptr<unsigned char[]>(
new unsigned char[program_binary_sizes[i]]);
}
err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES,
sizeof(unsigned char *) * device_list_size,
program_binaries.get(), nullptr);
MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err;
std::vector<unsigned char> content(
reinterpret_cast<unsigned char const *>(program_binaries[0].get()),
reinterpret_cast<unsigned char const *>(program_binaries[0].get()) +
program_binary_sizes[0]);
MACE_CHECK(WriteFile(binary_filename, true, content));
}
}
cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name,
cl::Kernel
OpenCLRuntime::BuildKernel(const std::string &program_name,
const std::string &kernel_name,
const std::set<std::string> &build_options) {
auto kernel_program_it = program_map_.find(program_name);
......@@ -191,7 +292,7 @@ cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name,
std::string program_file_name = kernel_program_it->second;
std::string build_options_str;
for(auto &option : build_options) {
for (auto &option : build_options) {
build_options_str += " " + option;
}
std::string built_program_key = program_name + build_options_str;
......@@ -202,7 +303,10 @@ cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name,
if (built_program_it != built_program_map_.end()) {
program = built_program_it->second;
} else {
this->BuildProgram(program_file_name, build_options_str, &program);
std::string binary_file_name_prefix =
GenerateCLBinaryFilenamePrefix(built_program_key);
this->BuildProgram(program_file_name, binary_file_name_prefix,
build_options_str, &program);
built_program_map_.emplace(built_program_key, program);
}
return cl::Kernel(program, kernel_name.c_str());
......@@ -214,7 +318,7 @@ uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() {
return static_cast<uint32_t>(size);
}
uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel& kernel) {
uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) {
unsigned long long size = 0;
kernel.getWorkGroupInfo(device_, CL_KERNEL_WORK_GROUP_SIZE, &size);
return static_cast<uint32_t>(size);
......
......@@ -44,9 +44,11 @@ class OpenCLRuntime {
OpenCLRuntime(const OpenCLRuntime&) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime&) = delete;
void BuildProgram(const std::string &kernel_name,
void BuildProgram(const std::string &program_file_name,
const std::string &binary_file_name,
const std::string &build_options,
cl::Program *program);
std::string GenerateCLBinaryFilenamePrefix(const std::string &filename_msg);
private:
static bool enable_profiling_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册