diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 57463d3a17900d2baa7727c2019b9ca40535c043..93cb44545393d3816bc9de871b10d4e946ebefa8 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -10,30 +10,74 @@ #include "mace/core/logging.h" #include "mace/core/runtime/opencl/opencl_runtime.h" +#include + namespace mace { namespace { -bool ReadSourceFile(const std::string &filename, std::string *content) { - MACE_CHECK_NOTNULL(content); - *content = ""; - std::ifstream ifs(filename, std::ifstream::in); +bool ReadFile(const std::string &filename, bool binary, + std::vector *content_ptr) { + MACE_CHECK_NOTNULL(content_ptr); + + std::ios_base::openmode mode = std::ios::in; + if (binary) { + mode |= std::ios::binary; + } + + std::ifstream ifs(filename, mode); + + // Stop eating new lines and whitespace + ifs.unsetf(std::ios::skipws); + if (!ifs.is_open()) { LOG(ERROR) << "Failed to open file " << filename; return false; } - std::string line; - while (std::getline(ifs, line)) { - *content += line; - *content += "\n"; + + ifs.seekg(0, std::ios::end); + const size_t filesize = ifs.tellg(); + if (filesize > 10 * 1024 * 1024) { + LOG(ERROR) << "Filesize overflow 10MB"; + return false; + } + content_ptr->reserve(filesize); + ifs.seekg(0, std::ios::beg); + content_ptr->insert(content_ptr->begin(), std::istreambuf_iterator(ifs), + std::istreambuf_iterator()); + + if (ifs.fail()) { + LOG(ERROR) << "Failed to read from file " << filename; + return false; } + ifs.close(); + + return true; +} + +bool WriteFile(const std::string &filename, bool binary, + const std::vector &content) { + std::ios_base::openmode mode = std::ios::out; + if (binary) { + mode |= std::ios::binary; + } + std::ofstream ofs(filename, mode); + + ofs.write(reinterpret_cast(&content[0]), + content.size() * sizeof(char)); + ofs.close(); + if (ofs.fail()) { + LOG(ERROR) << "Failed to write to file " << filename; + return false; + } + return true; } } // namespace bool OpenCLRuntime::enable_profiling_ = false; -std::unique_ptr OpenCLRuntime::profiling_ev_ = NULL; +std::unique_ptr OpenCLRuntime::profiling_ev_ = nullptr; OpenCLRuntime *OpenCLRuntime::Get() { static std::once_flag init_once; @@ -97,13 +141,9 @@ OpenCLRuntime *OpenCLRuntime::Get() { return instance; } -void OpenCLRuntime::EnableProfiling() { - enable_profiling_ = true; -} +void OpenCLRuntime::EnableProfiling() { enable_profiling_ = true; } -cl::Event* OpenCLRuntime::GetDefaultEvent() { - return profiling_ev_.get(); -} +cl::Event *OpenCLRuntime::GetDefaultEvent() { return profiling_ev_.get(); } cl_ulong OpenCLRuntime::GetEventProfilingStartInfo() { MACE_CHECK(profiling_ev_, "is NULL, should enable profiling first."); @@ -115,8 +155,7 @@ cl_ulong OpenCLRuntime::GetEventProfilingEndInfo() { return profiling_ev_->getProfilingInfo(); } -OpenCLRuntime::OpenCLRuntime(cl::Context context, - cl::Device device, +OpenCLRuntime::OpenCLRuntime(cl::Context context, cl::Device device, cl::CommandQueue command_queue) : context_(context), device_(device), command_queue_(command_queue) { const char *kernel_path = getenv("MACE_KERNEL_PATH"); @@ -137,37 +176,70 @@ cl::Program &OpenCLRuntime::program() { } // TODO(heliangliang) Support binary format -const std::map - OpenCLRuntime::program_map_ = { - {"addn", "addn.cl"}, - {"batch_norm", "batch_norm.cl"}, - {"bias_add", "bias_add.cl"}, - {"buffer_to_image", "buffer_to_image.cl"}, - {"conv_2d", "conv_2d.cl"}, - {"conv_2d_1x1", "conv_2d_1x1.cl"}, - {"conv_2d_3x3", "conv_2d_3x3.cl"}, - {"depthwise_conv_3x3", "depthwise_conv_3x3.cl"}, - {"pooling", "pooling.cl"}, - {"relu", "relu.cl"}, - {"concat", "concat.cl"}, - {"resize_bilinear", "resize_bilinear.cl"}, - {"space_to_batch", "space_to_batch.cl"}, +const std::map OpenCLRuntime::program_map_ = { + {"addn", "addn.cl"}, + {"batch_norm", "batch_norm.cl"}, + {"bias_add", "bias_add.cl"}, + {"buffer_to_image", "buffer_to_image.cl"}, + {"conv_2d", "conv_2d.cl"}, + {"conv_2d_1x1", "conv_2d_1x1.cl"}, + {"conv_2d_3x3", "conv_2d_3x3.cl"}, + {"depthwise_conv_3x3", "depthwise_conv_3x3.cl"}, + {"pooling", "pooling.cl"}, + {"relu", "relu.cl"}, + {"concat", "concat.cl"}, + {"resize_bilinear", "resize_bilinear.cl"}, + {"space_to_batch", "space_to_batch.cl"}, }; +std::string +OpenCLRuntime::GenerateCLBinaryFilenamePrefix(const std::string &filename_msg) { + std::string filename_prefix = filename_msg; + for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) { + if (*it == ' ' || *it == '-' || *it == '=') { + *it = '_'; + } + } + return filename_prefix; +} + void OpenCLRuntime::BuildProgram(const std::string &program_file_name, + const std::string &binary_file_name_prefix, const std::string &build_options, cl::Program *program) { MACE_CHECK_NOTNULL(program); - cl::Program::Sources sources; - std::string filename = kernel_path_ + program_file_name; - std::string kernel_source; - MACE_CHECK(ReadSourceFile(filename, &kernel_source)); - sources.push_back({kernel_source.c_str(), kernel_source.length()}); + std::string source_filename = kernel_path_ + program_file_name; + std::string binary_filename = kernel_path_ + binary_file_name_prefix + ".bin"; + + // Create program + bool is_binary_filename_exist = std::ifstream(binary_filename).is_open(); + if (is_binary_filename_exist) { + VLOG(1) << "Create program with binary: " << binary_filename; + std::vector binaries; + MACE_CHECK(ReadFile(binary_filename, true, &binaries)); + + *program = cl::Program(this->context(), {device()}, {binaries}); + + } else if (std::ifstream(source_filename).is_open()) { + VLOG(1) << "Create program with source: " << source_filename; + std::vector kernel_source; + MACE_CHECK(ReadFile(source_filename, false, &kernel_source)); - *program = cl::Program(this->context(), sources); - std::string build_options_str = build_options + - " -Werror -cl-mad-enable -cl-fast-relaxed-math -I" + kernel_path_; + cl::Program::Sources sources; + sources.push_back(std::string(kernel_source.begin(), kernel_source.end())); + + *program = cl::Program(this->context(), sources); + + } else { + LOG(FATAL) << "Failed to open kernel file " << binary_filename << " or " + << source_filename; + } + + // Build program + std::string build_options_str = + build_options + " -Werror -cl-mad-enable -cl-fast-relaxed-math -I" + + kernel_path_; // TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math cl_int ret = program->build({device()}, build_options_str.c_str()); if (ret != CL_SUCCESS) { @@ -179,11 +251,40 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, } LOG(FATAL) << "Build program failed: " << ret; } + + // Write binary if necessary + if (!is_binary_filename_exist) { + size_t device_list_size = 1; + std::unique_ptr program_binary_sizes( + new size_t[device_list_size]); + cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * device_list_size, + program_binary_sizes.get(), nullptr); + MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + std::unique_ptr[]> program_binaries( + new std::unique_ptr[ device_list_size ]); + for (cl_uint i = 0; i < device_list_size; ++i) { + program_binaries[i] = std::unique_ptr( + new unsigned char[program_binary_sizes[i]]); + } + + err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES, + sizeof(unsigned char *) * device_list_size, + program_binaries.get(), nullptr); + MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + std::vector content( + reinterpret_cast(program_binaries[0].get()), + reinterpret_cast(program_binaries[0].get()) + + program_binary_sizes[0]); + + MACE_CHECK(WriteFile(binary_filename, true, content)); + } } -cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name, - const std::string &kernel_name, - const std::set &build_options) { +cl::Kernel +OpenCLRuntime::BuildKernel(const std::string &program_name, + const std::string &kernel_name, + const std::set &build_options) { auto kernel_program_it = program_map_.find(program_name); if (kernel_program_it == program_map_.end()) { MACE_CHECK(false, program_name, " opencl kernel doesn't exist."); @@ -191,7 +292,7 @@ cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name, std::string program_file_name = kernel_program_it->second; std::string build_options_str; - for(auto &option : build_options) { + for (auto &option : build_options) { build_options_str += " " + option; } std::string built_program_key = program_name + build_options_str; @@ -202,7 +303,10 @@ cl::Kernel OpenCLRuntime::BuildKernel(const std::string &program_name, if (built_program_it != built_program_map_.end()) { program = built_program_it->second; } else { - this->BuildProgram(program_file_name, build_options_str, &program); + std::string binary_file_name_prefix = + GenerateCLBinaryFilenamePrefix(built_program_key); + this->BuildProgram(program_file_name, binary_file_name_prefix, + build_options_str, &program); built_program_map_.emplace(built_program_key, program); } return cl::Kernel(program, kernel_name.c_str()); @@ -214,7 +318,7 @@ uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() { return static_cast(size); } -uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel& kernel) { +uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { unsigned long long size = 0; kernel.getWorkGroupInfo(device_, CL_KERNEL_WORK_GROUP_SIZE, &size); return static_cast(size); diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index fda5813aa539a77e76659bca175eafd3d7359628..cd9852511f018b49e1fe930ee049cb61056c55b1 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -44,9 +44,11 @@ class OpenCLRuntime { OpenCLRuntime(const OpenCLRuntime&) = delete; OpenCLRuntime &operator=(const OpenCLRuntime&) = delete; - void BuildProgram(const std::string &kernel_name, + void BuildProgram(const std::string &program_file_name, + const std::string &binary_file_name, const std::string &build_options, cl::Program *program); + std::string GenerateCLBinaryFilenamePrefix(const std::string &filename_msg); private: static bool enable_profiling_;