未验证 提交 af79018f 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Fix opencl backend: Free opencl resources; Fix...

[LITE][OPENCL] Fix opencl backend: Free opencl resources; Fix AddKernel/GetKernel, program and all opencl kernels (#3344)

* [DONT MERGE] Fix opencl backend.

* [LITE][OPENCL] Fix kernels overlapped when add/get for kernels of mnasnet/yolonano. test=develop

* remove useless. test=develop

* add all image kernels for Get/Add kernel. test=develop

* add all image kernels for Get/Add kernel. test=develop

* fix buffer kernels of opencl. test=develop

* fix release opencl. test=develop
上级 44d98be8
...@@ -36,8 +36,10 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, ...@@ -36,8 +36,10 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
STL::stringstream program_key_ss; STL::stringstream program_key_ss;
program_key_ss << file_name << options; program_key_ss << file_name << options;
std::string program_key = program_key_ss.str(); std::string program_key = program_key_ss.str();
auto it = programs_.find(program_key);
if (it != programs_.end()) { auto &programs = CLRuntime::Global()->programs();
auto it = programs.find(program_key);
if (it != programs.end()) {
VLOG(3) << " --- program -> " << program_key << " has been built --- "; VLOG(3) << " --- program -> " << program_key << " has been built --- ";
return *(it->second); return *(it->second);
} }
...@@ -48,14 +50,15 @@ cl::Program &CLContext::GetProgram(const std::string &file_name, ...@@ -48,14 +50,15 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
CLRuntime::Global()->BuildProgram(program.get(), options); CLRuntime::Global()->BuildProgram(program.get(), options);
VLOG(3) << " --- end build program -> " << program_key << " --- "; VLOG(3) << " --- end build program -> " << program_key << " --- ";
programs_[program_key] = std::move(program); programs[program_key] = std::move(program);
return *(programs_[program_key]); return *(programs[program_key]);
} }
void CLContext::AddKernel(const std::string &kernel_name, void CLContext::AddKernel(const std::string &kernel_name,
const std::string &file_name, const std::string &file_name,
const std::string &options) { const std::string &options,
const std::string &time_stamp) {
cl_int status{CL_SUCCESS}; cl_int status{CL_SUCCESS};
VLOG(3) << " --- to get program " << file_name << " --- "; VLOG(3) << " --- to get program " << file_name << " --- ";
auto program = GetProgram(file_name, options); auto program = GetProgram(file_name, options);
...@@ -65,25 +68,30 @@ void CLContext::AddKernel(const std::string &kernel_name, ...@@ -65,25 +68,30 @@ void CLContext::AddKernel(const std::string &kernel_name,
new cl::Kernel(program, kernel_name.c_str(), &status)); new cl::Kernel(program, kernel_name.c_str(), &status));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
VLOG(3) << " --- end create kernel --- "; VLOG(3) << " --- end create kernel --- ";
kernels_.emplace_back(std::move(kernel));
auto &kernels = CLRuntime::Global()->kernels();
auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
kernels.emplace_back(std::move(kernel));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_name << options; kernel_key << kernel_name << options << time_stamp;
kernel_offset_[kernel_key.str()] = kernels_.size() - 1; kernel_offset_map[kernel_key.str()] = kernels.size() - 1;
} }
cl::Kernel &CLContext::GetKernel(const int index) { cl::Kernel &CLContext::GetKernel(const int index) {
VLOG(3) << " --- kernel count: " << kernels_.size() << " --- "; auto &kernels = CLRuntime::Global()->kernels();
CHECK(static_cast<size_t>(index) < kernels_.size()) VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels.size())
<< "The index must be less than the size of kernels."; << "The index must be less than the size of kernels.";
CHECK(kernels_[index] != nullptr) CHECK(kernels[index] != nullptr)
<< "The target kernel pointer cannot be null."; << "The target kernel pointer cannot be null.";
return *(kernels_[index]); return *(kernels[index]);
} }
cl::Kernel &CLContext::GetKernel(const std::string &name) { cl::Kernel &CLContext::GetKernel(const std::string &name) {
auto it = kernel_offset_.find(name); auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
CHECK(it != kernel_offset_.end()) << "Cannot find the kernel function: " auto it = kernel_offset_map.find(name);
<< name; CHECK(it != kernel_offset_map.end()) << "Cannot find the kernel function: "
<< name;
return GetKernel(it->second); return GetKernel(it->second);
} }
......
...@@ -36,7 +36,8 @@ class CLContext { ...@@ -36,7 +36,8 @@ class CLContext {
void AddKernel(const std::string &kernel_name, void AddKernel(const std::string &kernel_name,
const std::string &file_name, const std::string &file_name,
const std::string &options = ""); const std::string &options = "",
const std::string &time_stamp = "");
cl::Kernel &GetKernel(const int index); cl::Kernel &GetKernel(const int index);
...@@ -45,16 +46,12 @@ class CLContext { ...@@ -45,16 +46,12 @@ class CLContext {
cl::NDRange DefaultWorkSize(const CLImage &image); cl::NDRange DefaultWorkSize(const CLImage &image);
cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size); cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size);
cl::NDRange LocalWorkSizeTurn(cl::NDRange global_work_size, cl::NDRange LocalWorkSizeTurn(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divitor = 2); int divitor = 2);
// cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size, // cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size,
// size_t max_work_size); // size_t max_work_size);
private:
std::unordered_map<std::string, std::unique_ptr<cl::Program>> programs_;
std::vector<std::unique_ptr<cl::Kernel>> kernels_;
std::map<std::string, int> kernel_offset_;
}; };
} // namespace lite } // namespace lite
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "lite/backends/opencl/cl_runtime.h" #include "lite/backends/opencl/cl_runtime.h"
#include <string> #include <string>
#include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
...@@ -29,10 +30,26 @@ CLRuntime* CLRuntime::Global() { ...@@ -29,10 +30,26 @@ CLRuntime* CLRuntime::Global() {
CLRuntime::~CLRuntime() { CLRuntime::~CLRuntime() {
if (command_queue_ != nullptr) { if (command_queue_ != nullptr) {
command_queue_->flush();
command_queue_->finish(); command_queue_->finish();
} }
// For controlling the destruction order:
for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
clReleaseKernel(kernels_[kidx]->get());
kernels_[kidx].reset();
}
kernels_.clear();
kernel_offset_.clear();
for (auto& p : programs_) {
clReleaseProgram(p.second->get());
}
programs_.clear();
// For controlling the destruction order
command_queue_&& clReleaseCommandQueue(command_queue_->get());
command_queue_.reset(); command_queue_.reset();
context_&& clReleaseContext(context_->get());
context_.reset(); context_.reset();
device_.reset(); device_.reset();
platform_.reset(); platform_.reset();
...@@ -73,14 +90,14 @@ cl::CommandQueue& CLRuntime::command_queue() { ...@@ -73,14 +90,14 @@ cl::CommandQueue& CLRuntime::command_queue() {
return *command_queue_; return *command_queue_;
} }
std::unique_ptr<cl::Program> CLRuntime::CreateProgram( std::shared_ptr<cl::Program> CLRuntime::CreateProgram(
const cl::Context& context, std::string file_name) { const cl::Context& context, std::string file_name) {
auto cl_file = opencl_kernels_files.find(file_name); auto cl_file = opencl_kernels_files.find(file_name);
std::string content(cl_file->second.begin(), cl_file->second.end()); std::string content(cl_file->second.begin(), cl_file->second.end());
cl::Program::Sources sources; cl::Program::Sources sources;
sources.push_back(content); sources.push_back(content);
auto prog = auto prog =
std::unique_ptr<cl::Program>(new cl::Program(context, sources, &status_)); std::shared_ptr<cl::Program>(new cl::Program(context, sources, &status_));
VLOG(4) << "OpenCL kernel file name: " << file_name; VLOG(4) << "OpenCL kernel file name: " << file_name;
VLOG(4) << "Program source size: " << content.size(); VLOG(4) << "Program source size: " << content.size();
CL_CHECK_FATAL(status_); CL_CHECK_FATAL(status_);
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/backends/opencl/cl_utility.h" #include "lite/backends/opencl/cl_utility.h"
...@@ -42,7 +43,7 @@ class CLRuntime { ...@@ -42,7 +43,7 @@ class CLRuntime {
cl::CommandQueue& command_queue(); cl::CommandQueue& command_queue();
std::unique_ptr<cl::Program> CreateProgram(const cl::Context& context, std::shared_ptr<cl::Program> CreateProgram(const cl::Context& context,
std::string file_name); std::string file_name);
std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context); std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context);
...@@ -57,6 +58,12 @@ class CLRuntime { ...@@ -57,6 +58,12 @@ class CLRuntime {
std::map<std::string, size_t>& GetDeviceInfo(); std::map<std::string, size_t>& GetDeviceInfo();
std::unordered_map<std::string, std::shared_ptr<cl::Program>>& programs() {
return programs_;
}
std::vector<std::unique_ptr<cl::Kernel>>& kernels() { return kernels_; }
std::map<std::string, int>& kernel_offset() { return kernel_offset_; }
private: private:
CLRuntime() = default; CLRuntime() = default;
...@@ -98,6 +105,12 @@ class CLRuntime { ...@@ -98,6 +105,12 @@ class CLRuntime {
std::shared_ptr<cl::CommandQueue> command_queue_{nullptr}; std::shared_ptr<cl::CommandQueue> command_queue_{nullptr};
std::unordered_map<std::string, std::shared_ptr<cl::Program>> programs_{};
std::vector<std::unique_ptr<cl::Kernel>> kernels_{};
std::map<std::string, int> kernel_offset_{};
cl_int status_{CL_SUCCESS}; cl_int status_{CL_SUCCESS};
bool initialized_{false}; bool initialized_{false};
......
...@@ -32,8 +32,10 @@ class ReluCompute ...@@ -32,8 +32,10 @@ class ReluCompute
std::string doc() const override { return "Relu using cl::Buffer, kFloat"; } std::string doc() const override { return "Relu using cl::Buffer, kFloat"; }
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/relu_kernel.cl", build_options_); "buffer/relu_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -46,7 +48,7 @@ class ReluCompute ...@@ -46,7 +48,7 @@ class ReluCompute
auto* x_buf = param.X->data<float, cl::Buffer>(); auto* x_buf = param.X->data<float, cl::Buffer>();
auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target()); VLOG(4) << TargetToStr(param.Out->target());
...@@ -74,6 +76,7 @@ class ReluCompute ...@@ -74,6 +76,7 @@ class ReluCompute
private: private:
std::string kernel_func_name_{"relu"}; std::string kernel_func_name_{"relu"};
std::string build_options_{"-DCL_DTYPE_float -DRELU"}; std::string build_options_{"-DCL_DTYPE_float -DRELU"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
...@@ -87,8 +90,10 @@ class SigmoidCompute ...@@ -87,8 +90,10 @@ class SigmoidCompute
} }
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/sigmoid_kernel.cl", build_options_); "buffer/sigmoid_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -101,7 +106,7 @@ class SigmoidCompute ...@@ -101,7 +106,7 @@ class SigmoidCompute
auto* x_buf = param.X->data<float, cl::Buffer>(); auto* x_buf = param.X->data<float, cl::Buffer>();
auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << TargetToStr(param.X->target()); VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target()); VLOG(4) << TargetToStr(param.Out->target());
...@@ -129,6 +134,7 @@ class SigmoidCompute ...@@ -129,6 +134,7 @@ class SigmoidCompute
private: private:
std::string kernel_func_name_{"sigmoid"}; std::string kernel_func_name_{"sigmoid"};
std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"}; std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -77,11 +77,13 @@ class ActivationComputeImageDefault ...@@ -77,11 +77,13 @@ class ActivationComputeImageDefault
#endif #endif
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/activation_kernel.cl", build_options_); "image/activation_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
} }
...@@ -171,6 +173,7 @@ class ActivationComputeImageDefault ...@@ -171,6 +173,7 @@ class ActivationComputeImageDefault
cl::NDRange global_work_size_ = cl::NDRange{ cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
} // namespace opencl } // namespace opencl
......
...@@ -43,8 +43,10 @@ class BilinearInterpImageCompute ...@@ -43,8 +43,10 @@ class BilinearInterpImageCompute
bilinear_interp_param_ = param_.get_mutable<param_t>(); bilinear_interp_param_ = param_.get_mutable<param_t>();
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/bilinear_interp_kernel.cl", build_options_); "image/bilinear_interp_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -103,7 +105,7 @@ class BilinearInterpImageCompute ...@@ -103,7 +105,7 @@ class BilinearInterpImageCompute
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0; int arg_idx = 0;
...@@ -159,6 +161,7 @@ class BilinearInterpImageCompute ...@@ -159,6 +161,7 @@ class BilinearInterpImageCompute
param_t* bilinear_interp_param_{nullptr}; param_t* bilinear_interp_param_{nullptr};
std::string kernel_func_name_{"bilinear_interp"}; std::string kernel_func_name_{"bilinear_interp"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -38,8 +38,10 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL), ...@@ -38,8 +38,10 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
} else { } else {
kernel_func_name_ = "concat_mul"; kernel_func_name_ = "concat_mul";
} }
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/concat_kernel.cl", build_options_); "buffer/concat_kernel.cl",
build_options_,
time_stamp_);
auto axis = concat_param_->axis; auto axis = concat_param_->axis;
auto inputs = concat_param_->x; auto inputs = concat_param_->x;
...@@ -88,7 +90,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL), ...@@ -88,7 +90,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto inputs = param.x; auto inputs = param.x;
int arg_idx = 0; int arg_idx = 0;
...@@ -177,6 +179,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL), ...@@ -177,6 +179,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
param_t* concat_param_{nullptr}; param_t* concat_param_{nullptr};
std::string kernel_func_name_{}; std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_float"}; std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -40,8 +40,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -40,8 +40,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
kernel_func_name_ = "concat_mul"; kernel_func_name_ = "concat_mul";
} }
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/concat_kernel.cl", build_options_); "image/concat_kernel.cl",
build_options_,
time_stamp_);
auto axis = concat_param_->axis; auto axis = concat_param_->axis;
auto inputs = concat_param_->x; auto inputs = concat_param_->x;
...@@ -117,7 +119,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -117,7 +119,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto inputs = param.x; auto inputs = param.x;
int arg_idx = 0; int arg_idx = 0;
...@@ -251,6 +253,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL), ...@@ -251,6 +253,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
param_t* concat_param_{nullptr}; param_t* concat_param_{nullptr};
std::string kernel_func_name_{}; std::string kernel_func_name_{};
std::string build_options_{" -DCL_DTYPE_half"}; std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -114,8 +114,10 @@ void ConvCompute::PrepareForRun() { ...@@ -114,8 +114,10 @@ void ConvCompute::PrepareForRun() {
} }
for (size_t i = 0; i < kernel_func_names_.size(); i++) { for (size_t i = 0; i < kernel_func_names_.size(); i++) {
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_names_[i],
kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]); kernel_func_paths_[i],
build_options_[i],
time_stamp_);
} }
} }
...@@ -153,7 +155,7 @@ void ConvCompute::GemmlikeConv2d() { ...@@ -153,7 +155,7 @@ void ConvCompute::GemmlikeConv2d() {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
std::stringstream kernel_key; std::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_;
auto img2col_kernel = context.cl_context()->GetKernel(kernel_key.str()); auto img2col_kernel = context.cl_context()->GetKernel(kernel_key.str());
int n_threads = c_in * h_out * w_out; int n_threads = c_in * h_out * w_out;
...@@ -218,7 +220,7 @@ void ConvCompute::GemmlikeConv2d() { ...@@ -218,7 +220,7 @@ void ConvCompute::GemmlikeConv2d() {
int n = h_out * w_out; int n = h_out * w_out;
VLOG(4) << "m = " << m << " n = " << n << " k = " << k; VLOG(4) << "m = " << m << " n = " << n << " k = " << k;
kernel_key.str(""); kernel_key.str("");
kernel_key << kernel_func_names_[1] << build_options_[1]; kernel_key << kernel_func_names_[1] << build_options_[1] << time_stamp_;
auto gemm_kernel = context.cl_context()->GetKernel(kernel_key.str()); auto gemm_kernel = context.cl_context()->GetKernel(kernel_key.str());
GemmBatched( GemmBatched(
gemm_kernel, col_buf, filter_buf, bias_buf, output_buf, bs, m, n, k); gemm_kernel, col_buf, filter_buf, bias_buf, output_buf, bs, m, n, k);
...@@ -249,7 +251,8 @@ void ConvCompute::Conv2d1x1() { ...@@ -249,7 +251,8 @@ void ConvCompute::Conv2d1x1() {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
std::stringstream kernel_key; std::stringstream kernel_key;
kernel_key << kernel_func_names_.front() << build_options_.front(); kernel_key << kernel_func_names_.front() << build_options_.front()
<< time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
GemmBatched(kernel, x_d, filter_d, bias_d, output_d, batch_size, m, n, k); GemmBatched(kernel, x_d, filter_d, bias_d, output_d, batch_size, m, n, k);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
namespace paddle { namespace paddle {
...@@ -55,6 +56,7 @@ class ConvCompute ...@@ -55,6 +56,7 @@ class ConvCompute
std::vector<std::string> kernel_func_names_{}; std::vector<std::string> kernel_func_names_{};
std::vector<std::string> kernel_func_paths_{}; std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{}; std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -369,15 +369,17 @@ void ConvImageCompute::PrepareForRun() { ...@@ -369,15 +369,17 @@ void ConvImageCompute::PrepareForRun() {
build_options_.push_back(build_options_single); build_options_.push_back(build_options_single);
for (size_t i = 0; i < kernel_func_names_.size(); i++) { for (size_t i = 0; i < kernel_func_names_.size(); i++) {
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_names_[i],
kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]); kernel_func_paths_[i],
build_options_[i],
time_stamp_);
} }
VLOG(4) << "global_work_size_[3D]: {" << global_work_size_[0] << "," VLOG(4) << "global_work_size_[3D]: {" << global_work_size_[0] << ","
<< global_work_size_[1] << "," << global_work_size_[2] << "}"; << global_work_size_[1] << "," << global_work_size_[2] << "}";
std::stringstream kernel_key; std::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0]; kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str(); VLOG(4) << "kernel ready ... " << kernel_key.str();
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
namespace paddle { namespace paddle {
...@@ -56,6 +57,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -56,6 +57,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
std::vector<std::string> kernel_func_names_{}; std::vector<std::string> kernel_func_names_{};
std::vector<std::string> kernel_func_paths_{}; std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{}; std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
Tensor filter_gpu_image_; Tensor filter_gpu_image_;
Tensor bias_gpu_image_; Tensor bias_gpu_image_;
......
...@@ -44,8 +44,10 @@ class DepthwiseConv2dCompute ...@@ -44,8 +44,10 @@ class DepthwiseConv2dCompute
build_options_ += " -DRELU6"; build_options_ += " -DRELU6";
} }
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/depthwise_conv2d_kernel.cl", build_options_); "buffer/depthwise_conv2d_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -67,7 +69,7 @@ class DepthwiseConv2dCompute ...@@ -67,7 +69,7 @@ class DepthwiseConv2dCompute
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status; cl_int status;
...@@ -120,6 +122,7 @@ class DepthwiseConv2dCompute ...@@ -120,6 +122,7 @@ class DepthwiseConv2dCompute
private: private:
std::string kernel_func_name_{"depthwise_conv2d"}; std::string kernel_func_name_{"depthwise_conv2d"};
std::string build_options_{"-DCL_DTYPE_float"}; std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -40,8 +40,10 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -40,8 +40,10 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/dropout_kernel.cl", build_options_); "image/dropout_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -63,7 +65,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -63,7 +65,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status; cl_int status;
...@@ -101,6 +103,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -101,6 +103,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
private: private:
std::string kernel_func_name_{"dropout"}; std::string kernel_func_name_{"dropout"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -25,8 +25,10 @@ namespace opencl { ...@@ -25,8 +25,10 @@ namespace opencl {
void ElementwiseAddCompute::PrepareForRun() { void ElementwiseAddCompute::PrepareForRun() {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_); "buffer/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>(); ele_param_ = param_.get_mutable<param_t>();
UpdateParams(); UpdateParams();
} }
...@@ -39,7 +41,7 @@ void ElementwiseAddCompute::Run() { ...@@ -39,7 +41,7 @@ void ElementwiseAddCompute::Run() {
auto* out_buf = ele_param_->Out->template mutable_data<float, cl::Buffer>( auto* out_buf = ele_param_->Out->template mutable_data<float, cl::Buffer>(
TARGET(kOpenCL)); TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(ele_param_->X->target()); VLOG(4) << TargetToStr(ele_param_->X->target());
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
...@@ -46,6 +47,7 @@ class ElementwiseAddCompute ...@@ -46,6 +47,7 @@ class ElementwiseAddCompute
param_t* ele_param_{nullptr}; param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_add"}; std::string kernel_func_name_{"elementwise_add"};
std::string build_options_{"-DCL_DTYPE_float"}; std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -59,11 +59,13 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() { ...@@ -59,11 +59,13 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() {
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_); "image/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
// compute image shape // compute image shape
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
...@@ -54,8 +55,10 @@ class ElementwiseAddImageCompute ...@@ -54,8 +55,10 @@ class ElementwiseAddImageCompute
{static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)})); {static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)}));
DDim out_img_shape_ = DDim(std::vector<DDim::value_type>( DDim out_img_shape_ = DDim(std::vector<DDim::value_type>(
{static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)})); {static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)}));
std::string kernel_func_name_{"elementwise_add"}; std::string kernel_func_name_{"elementwise_add"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
bool first_epoch_for_reinit_{true}; bool first_epoch_for_reinit_{true};
cl::Kernel kernel_; cl::Kernel kernel_;
cl::NDRange global_work_size_ = cl::NDRange{ cl::NDRange global_work_size_ = cl::NDRange{
......
...@@ -71,8 +71,10 @@ class ElementwiseMulImageCompute ...@@ -71,8 +71,10 @@ class ElementwiseMulImageCompute
VLOG(4) << "bias_dims.size():" << bias_dims.size(); VLOG(4) << "bias_dims.size():" << bias_dims.size();
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/elementwise_mul_kernel.cl", build_options_); "image/elementwise_mul_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -114,7 +116,7 @@ class ElementwiseMulImageCompute ...@@ -114,7 +116,7 @@ class ElementwiseMulImageCompute
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto bias_dims = y->dims(); auto bias_dims = y->dims();
...@@ -201,6 +203,7 @@ class ElementwiseMulImageCompute ...@@ -201,6 +203,7 @@ class ElementwiseMulImageCompute
param_t* ele_param_{nullptr}; param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_mul"}; std::string kernel_func_name_{"elementwise_mul"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -49,8 +49,10 @@ void ElementwiseSubImageCompute::PrepareForRun() { ...@@ -49,8 +49,10 @@ void ElementwiseSubImageCompute::PrepareForRun() {
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/elementwise_sub_kernel.cl", build_options_); "image/elementwise_sub_kernel.cl",
build_options_,
time_stamp_);
} }
void ElementwiseSubImageCompute::Run() { void ElementwiseSubImageCompute::Run() {
...@@ -93,7 +95,7 @@ void ElementwiseSubImageCompute::Run() { ...@@ -93,7 +95,7 @@ void ElementwiseSubImageCompute::Run() {
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0; int arg_idx = 0;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <string> #include <string>
#include "lite/backends/opencl/cl_half.h" #include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
...@@ -44,6 +45,7 @@ class ElementwiseSubImageCompute ...@@ -44,6 +45,7 @@ class ElementwiseSubImageCompute
param_t* ele_param_{nullptr}; param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_sub"}; std::string kernel_func_name_{"elementwise_sub"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/replace_stl/stream.h" #include "lite/utils/replace_stl/stream.h"
#include "lite/utils/string.h" #include "lite/utils/string.h"
...@@ -74,10 +75,12 @@ class FcCompute ...@@ -74,10 +75,12 @@ class FcCompute
} }
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/fc_kernel.cl", build_options_); "buffer/fc_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
// compute global work size // compute global work size
...@@ -136,6 +139,7 @@ class FcCompute ...@@ -136,6 +139,7 @@ class FcCompute
param_t* fc_param_{nullptr}; param_t* fc_param_{nullptr};
std::string kernel_func_name_{}; std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_float "}; std::string build_options_{"-DCL_DTYPE_float "};
std::string time_stamp_{GetTimeStamp()};
bool first_epoch_for_reinit_{true}; bool first_epoch_for_reinit_{true};
DDim last_x_dims_; DDim last_x_dims_;
cl::NDRange global_work_size_; cl::NDRange global_work_size_;
......
...@@ -28,8 +28,10 @@ class FusionElementwiseAddActivationCompute : public ElementwiseAddCompute { ...@@ -28,8 +28,10 @@ class FusionElementwiseAddActivationCompute : public ElementwiseAddCompute {
void PrepareForRun() override { void PrepareForRun() override {
build_options_ += " -DRELU"; build_options_ += " -DRELU";
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_); "buffer/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>(); ele_param_ = param_.get_mutable<param_t>();
UpdateParams(); UpdateParams();
auto act_t = static_cast<param_t*>(ele_param_)->act_type; auto act_t = static_cast<param_t*>(ele_param_)->act_type;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/kernels/opencl/elementwise_add_image_compute.h" #include "lite/kernels/opencl/elementwise_add_image_compute.h"
#include "lite/kernels/opencl/image_helper.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -30,8 +31,10 @@ class FusionElementwiseAddActivationImageCompute ...@@ -30,8 +31,10 @@ class FusionElementwiseAddActivationImageCompute
void PrepareForRun() override { void PrepareForRun() override {
build_options_ += " -DRELU"; build_options_ += " -DRELU";
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_); "image/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>(); ele_param_ = param_.get_mutable<param_t>();
auto act_t = static_cast<param_t*>(ele_param_)->act_type; auto act_t = static_cast<param_t*>(ele_param_)->act_type;
VLOG(4) << "act: " << act_t; VLOG(4) << "act: " << act_t;
......
...@@ -40,12 +40,14 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -40,12 +40,14 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/grid_sampler_kernel.cl", build_options_); "image/grid_sampler_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << "kernel_key: " << kernel_key.str(); VLOG(4) << "kernel_key: " << kernel_key.str();
} }
...@@ -150,6 +152,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -150,6 +152,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
cl::NDRange global_work_size_ = cl::NDRange{ cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -74,6 +74,12 @@ static std::vector<size_t> DefaultWorkSize(const DDim& image_dim, ...@@ -74,6 +74,12 @@ static std::vector<size_t> DefaultWorkSize(const DDim& image_dim,
LOG(FATAL) << " not support this dim, need imp "; LOG(FATAL) << " not support this dim, need imp ";
} }
static const std::string GetTimeStamp() {
struct timeval time;
gettimeofday(&time, NULL);
return std::to_string(time.tv_usec);
}
} // namespace opencl } // namespace opencl
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
...@@ -60,8 +60,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -60,8 +60,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
} }
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/instance_norm_kernel.cl", build_options_); "image/instance_norm_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -115,7 +117,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -115,7 +117,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
out_image_shape["width"], out_image_shape["height"]); out_image_shape["width"], out_image_shape["height"]);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status = kernel.setArg(0, out_w); cl_int status = kernel.setArg(0, out_w);
...@@ -180,8 +182,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -180,8 +182,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
bias_image_.mutable_data<half_t, cl::Image2D>( bias_image_.mutable_data<half_t, cl::Image2D>(
scale_img_size[0], scale_img_size[1], bias_img.data()); scale_img_size[0], scale_img_size[1], bias_img.data());
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/instance_norm_kernel.cl", build_options_); "image/instance_norm_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -234,7 +238,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -234,7 +238,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto* scale_img = scale_image_.data<half_t, cl::Image2D>(); auto* scale_img = scale_image_.data<half_t, cl::Image2D>();
auto* bias_img = bias_image_.data<half_t, cl::Image2D>(); auto* bias_img = bias_image_.data<half_t, cl::Image2D>();
...@@ -271,6 +275,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -271,6 +275,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
param_t* instance_norm_param_{nullptr}; param_t* instance_norm_param_{nullptr};
std::string kernel_func_name_{"instance_norm_onnx"}; std::string kernel_func_name_{"instance_norm_onnx"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
Tensor scale_image_; Tensor scale_image_;
Tensor bias_image_; Tensor bias_image_;
......
...@@ -48,7 +48,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -48,7 +48,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
beta_ = lrn_param_->beta; beta_ = lrn_param_->beta;
norm_region_ = lrn_param_->norm_region; norm_region_ = lrn_param_->norm_region;
context.cl_context()->AddKernel( context.cl_context()->AddKernel(
kernel_func_name_, "image/lrn_kernel.cl", build_options_); kernel_func_name_, "image/lrn_kernel.cl", build_options_, time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -91,7 +91,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -91,7 +91,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0; int arg_idx = 0;
...@@ -152,6 +152,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -152,6 +152,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
std::string norm_region_{"AcrossChannels"}; std::string norm_region_{"AcrossChannels"};
std::string kernel_func_name_{"lrn"}; std::string kernel_func_name_{"lrn"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h" #include "lite/operators/op_params.h"
#include "lite/utils/replace_stl/stream.h" #include "lite/utils/replace_stl/stream.h"
#include "lite/utils/string.h" #include "lite/utils/string.h"
...@@ -32,8 +33,10 @@ class MulCompute ...@@ -32,8 +33,10 @@ class MulCompute
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/mat_mul_kernel.cl", build_options_); "buffer/mat_mul_kernel.cl",
build_options_,
time_stamp_);
const auto& param = *param_.get_mutable<param_t>(); const auto& param = *param_.get_mutable<param_t>();
const auto* x_data = param.x->data<float>(); const auto* x_data = param.x->data<float>();
const auto* y_data = param.y->data<float>(); const auto* y_data = param.y->data<float>();
...@@ -68,7 +71,7 @@ class MulCompute ...@@ -68,7 +71,7 @@ class MulCompute
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status; cl_int status;
...@@ -103,6 +106,7 @@ class MulCompute ...@@ -103,6 +106,7 @@ class MulCompute
int m_, n_, k_; int m_, n_, k_;
std::string kernel_func_name_{"mat_mul"}; std::string kernel_func_name_{"mat_mul"};
std::string build_options_{"-DCL_DTYPE_float"}; std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -38,8 +38,10 @@ class NearestInterpComputeImageDefault ...@@ -38,8 +38,10 @@ class NearestInterpComputeImageDefault
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/nearest_interp_kernel.cl", build_options_); "image/nearest_interp_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -66,7 +68,7 @@ class NearestInterpComputeImageDefault ...@@ -66,7 +68,7 @@ class NearestInterpComputeImageDefault
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0; int arg_idx = 0;
...@@ -121,6 +123,7 @@ class NearestInterpComputeImageDefault ...@@ -121,6 +123,7 @@ class NearestInterpComputeImageDefault
private: private:
std::string kernel_func_name_{"nearest_interp"}; std::string kernel_func_name_{"nearest_interp"};
std::string build_options_{" -DCL_DTYPE_half"}; std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -52,8 +52,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -52,8 +52,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
} }
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/pad2d_kernel.cl", build_options_); "image/pad2d_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
} }
...@@ -93,7 +95,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -93,7 +95,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
#endif #endif
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0; int arg_idx = 0;
...@@ -159,6 +161,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL), ...@@ -159,6 +161,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
param_t* pad2d_param_{nullptr}; param_t* pad2d_param_{nullptr};
std::string kernel_func_name_{}; std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -37,8 +37,10 @@ class PoolCompute ...@@ -37,8 +37,10 @@ class PoolCompute
const auto& param = *param_.get_mutable<param_t>(); const auto& param = *param_.get_mutable<param_t>();
kernel_func_name_ += param.pooling_type; kernel_func_name_ += param.pooling_type;
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "buffer/pool_kernel.cl", build_options_); "buffer/pool_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -69,7 +71,7 @@ class PoolCompute ...@@ -69,7 +71,7 @@ class PoolCompute
auto* output_buf = auto* output_buf =
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL)); param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status; cl_int status;
auto numel = out_dims.production(); auto numel = out_dims.production();
...@@ -117,6 +119,7 @@ class PoolCompute ...@@ -117,6 +119,7 @@ class PoolCompute
private: private:
std::string kernel_func_name_{"pool_"}; std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_float"}; std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -47,7 +47,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -47,7 +47,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(
kernel_func_name_, "image/pool_kernel.cl", build_options_); kernel_func_name_, "image/pool_kernel.cl", build_options_, time_stamp_);
} }
void Run() override { void Run() override {
...@@ -112,7 +112,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -112,7 +112,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
// VLOG(4) << "out_image" << out_img; // VLOG(4) << "out_image" << out_img;
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int c_block = (out_dims[1] + 3) / 4; int c_block = (out_dims[1] + 3) / 4;
...@@ -164,6 +164,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -164,6 +164,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
private: private:
std::string kernel_func_name_{"pool_"}; std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -36,8 +36,10 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -36,8 +36,10 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/reshape_kernel.cl", build_options_); "image/reshape_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -110,7 +112,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -110,7 +112,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
...@@ -166,6 +168,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL), ...@@ -166,6 +168,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
private: private:
std::string kernel_func_name_{"reshape"}; std::string kernel_func_name_{"reshape"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -37,12 +37,14 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -37,12 +37,14 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/scale_kernel.cl", build_options_); "image/scale_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str()); kernel_ = context.cl_context()->GetKernel(kernel_key.str());
} }
...@@ -105,6 +107,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -105,6 +107,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
private: private:
std::string kernel_func_name_{"scale"}; std::string kernel_func_name_{"scale"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
param_t* scale_param_{nullptr}; param_t* scale_param_{nullptr};
......
...@@ -38,8 +38,10 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -38,8 +38,10 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override { void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_; VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel( context.cl_context()->AddKernel(kernel_func_name_,
kernel_func_name_, "image/slice_kernel.cl", build_options_); "image/slice_kernel.cl",
build_options_,
time_stamp_);
} }
void Run() override { void Run() override {
...@@ -68,7 +70,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -68,7 +70,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key; STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_; kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str()); auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status; cl_int status;
...@@ -108,6 +110,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL), ...@@ -108,6 +110,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
private: private:
std::string kernel_func_name_{"slice"}; std::string kernel_func_name_{"slice"};
std::string build_options_{"-DCL_DTYPE_half"}; std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
}; };
......
...@@ -192,6 +192,7 @@ function build_opencl { ...@@ -192,6 +192,7 @@ function build_opencl {
cmake_opencl ${os} ${abi} ${lang} cmake_opencl ${os} ${abi} ${lang}
make opencl_clhpp -j$NUM_CORES_FOR_COMPILE make opencl_clhpp -j$NUM_CORES_FOR_COMPILE
make publish_inference -j$NUM_CORES_FOR_COMPILE
build $TESTS_FILE build $TESTS_FILE
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册