提交 296bda4d 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] Fix opencl backend: Free opencl resources; Fix...

[LITE][OPENCL] Fix opencl backend: Free opencl resources; Fix AddKernel/GetKernel, program and all opencl kernels (#3344)

* [DONT MERGE] Fix opencl backend.

* [LITE][OPENCL] Fix kernels overlapped when add/get for kernels of mnasnet/yolonano. test=develop

* remove useless. test=develop

* add all image kernels for Get/Add kernel. test=develop

* add all image kernels for Get/Add kernel. test=develop

* fix buffer kernels of opencl. test=develop

* fix release opencl. test=develop
上级 e82272d4
......@@ -36,8 +36,10 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
STL::stringstream program_key_ss;
program_key_ss << file_name << options;
std::string program_key = program_key_ss.str();
auto it = programs_.find(program_key);
if (it != programs_.end()) {
auto &programs = CLRuntime::Global()->programs();
auto it = programs.find(program_key);
if (it != programs.end()) {
VLOG(3) << " --- program -> " << program_key << " has been built --- ";
return *(it->second);
}
......@@ -48,14 +50,15 @@ cl::Program &CLContext::GetProgram(const std::string &file_name,
CLRuntime::Global()->BuildProgram(program.get(), options);
VLOG(3) << " --- end build program -> " << program_key << " --- ";
programs_[program_key] = std::move(program);
programs[program_key] = std::move(program);
return *(programs_[program_key]);
return *(programs[program_key]);
}
void CLContext::AddKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
const std::string &options,
const std::string &time_stamp) {
cl_int status{CL_SUCCESS};
VLOG(3) << " --- to get program " << file_name << " --- ";
auto program = GetProgram(file_name, options);
......@@ -65,25 +68,30 @@ void CLContext::AddKernel(const std::string &kernel_name,
new cl::Kernel(program, kernel_name.c_str(), &status));
CL_CHECK_FATAL(status);
VLOG(3) << " --- end create kernel --- ";
kernels_.emplace_back(std::move(kernel));
auto &kernels = CLRuntime::Global()->kernels();
auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
kernels.emplace_back(std::move(kernel));
STL::stringstream kernel_key;
kernel_key << kernel_name << options;
kernel_offset_[kernel_key.str()] = kernels_.size() - 1;
kernel_key << kernel_name << options << time_stamp;
kernel_offset_map[kernel_key.str()] = kernels.size() - 1;
}
cl::Kernel &CLContext::GetKernel(const int index) {
VLOG(3) << " --- kernel count: " << kernels_.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels_.size())
auto &kernels = CLRuntime::Global()->kernels();
VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels.size())
<< "The index must be less than the size of kernels.";
CHECK(kernels_[index] != nullptr)
CHECK(kernels[index] != nullptr)
<< "The target kernel pointer cannot be null.";
return *(kernels_[index]);
return *(kernels[index]);
}
cl::Kernel &CLContext::GetKernel(const std::string &name) {
auto it = kernel_offset_.find(name);
CHECK(it != kernel_offset_.end()) << "Cannot find the kernel function: "
<< name;
auto &kernel_offset_map = CLRuntime::Global()->kernel_offset();
auto it = kernel_offset_map.find(name);
CHECK(it != kernel_offset_map.end()) << "Cannot find the kernel function: "
<< name;
return GetKernel(it->second);
}
......
......@@ -36,7 +36,8 @@ class CLContext {
void AddKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options = "");
const std::string &options = "",
const std::string &time_stamp = "");
cl::Kernel &GetKernel(const int index);
......@@ -45,16 +46,12 @@ class CLContext {
cl::NDRange DefaultWorkSize(const CLImage &image);
cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size);
cl::NDRange LocalWorkSizeTurn(cl::NDRange global_work_size,
size_t max_work_size,
int divitor = 2);
// cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size,
// size_t max_work_size);
private:
std::unordered_map<std::string, std::unique_ptr<cl::Program>> programs_;
std::vector<std::unique_ptr<cl::Kernel>> kernels_;
std::map<std::string, int> kernel_offset_;
};
} // namespace lite
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "lite/backends/opencl/cl_runtime.h"
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "lite/utils/cp_logging.h"
......@@ -29,10 +30,26 @@ CLRuntime* CLRuntime::Global() {
CLRuntime::~CLRuntime() {
if (command_queue_ != nullptr) {
command_queue_->flush();
command_queue_->finish();
}
// For controlling the destruction order:
for (size_t kidx = 0; kidx < kernels_.size(); ++kidx) {
clReleaseKernel(kernels_[kidx]->get());
kernels_[kidx].reset();
}
kernels_.clear();
kernel_offset_.clear();
for (auto& p : programs_) {
clReleaseProgram(p.second->get());
}
programs_.clear();
// For controlling the destruction order
command_queue_&& clReleaseCommandQueue(command_queue_->get());
command_queue_.reset();
context_&& clReleaseContext(context_->get());
context_.reset();
device_.reset();
platform_.reset();
......@@ -73,14 +90,14 @@ cl::CommandQueue& CLRuntime::command_queue() {
return *command_queue_;
}
std::unique_ptr<cl::Program> CLRuntime::CreateProgram(
std::shared_ptr<cl::Program> CLRuntime::CreateProgram(
const cl::Context& context, std::string file_name) {
auto cl_file = opencl_kernels_files.find(file_name);
std::string content(cl_file->second.begin(), cl_file->second.end());
cl::Program::Sources sources;
sources.push_back(content);
auto prog =
std::unique_ptr<cl::Program>(new cl::Program(context, sources, &status_));
std::shared_ptr<cl::Program>(new cl::Program(context, sources, &status_));
VLOG(4) << "OpenCL kernel file name: " << file_name;
VLOG(4) << "Program source size: " << content.size();
CL_CHECK_FATAL(status_);
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/backends/opencl/cl_include.h"
#include "lite/backends/opencl/cl_utility.h"
......@@ -42,7 +43,7 @@ class CLRuntime {
cl::CommandQueue& command_queue();
std::unique_ptr<cl::Program> CreateProgram(const cl::Context& context,
std::shared_ptr<cl::Program> CreateProgram(const cl::Context& context,
std::string file_name);
std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context);
......@@ -57,6 +58,12 @@ class CLRuntime {
std::map<std::string, size_t>& GetDeviceInfo();
std::unordered_map<std::string, std::shared_ptr<cl::Program>>& programs() {
return programs_;
}
std::vector<std::unique_ptr<cl::Kernel>>& kernels() { return kernels_; }
std::map<std::string, int>& kernel_offset() { return kernel_offset_; }
private:
CLRuntime() = default;
......@@ -98,6 +105,12 @@ class CLRuntime {
std::shared_ptr<cl::CommandQueue> command_queue_{nullptr};
std::unordered_map<std::string, std::shared_ptr<cl::Program>> programs_{};
std::vector<std::unique_ptr<cl::Kernel>> kernels_{};
std::map<std::string, int> kernel_offset_{};
cl_int status_{CL_SUCCESS};
bool initialized_{false};
......
......@@ -32,8 +32,10 @@ class ReluCompute
std::string doc() const override { return "Relu using cl::Buffer, kFloat"; }
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/relu_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/relu_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -46,7 +48,7 @@ class ReluCompute
auto* x_buf = param.X->data<float, cl::Buffer>();
auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target());
......@@ -74,6 +76,7 @@ class ReluCompute
private:
std::string kernel_func_name_{"relu"};
std::string build_options_{"-DCL_DTYPE_float -DRELU"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......@@ -87,8 +90,10 @@ class SigmoidCompute
}
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/sigmoid_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/sigmoid_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -101,7 +106,7 @@ class SigmoidCompute
auto* x_buf = param.X->data<float, cl::Buffer>();
auto* out_buf = param.Out->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << TargetToStr(param.X->target());
VLOG(4) << TargetToStr(param.Out->target());
......@@ -129,6 +134,7 @@ class SigmoidCompute
private:
std::string kernel_func_name_{"sigmoid"};
std::string build_options_{"-DCL_DTYPE_float -DSIGMOID"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -77,11 +77,13 @@ class ActivationComputeImageDefault
#endif
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/activation_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/activation_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
}
......@@ -171,6 +173,7 @@ class ActivationComputeImageDefault
cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
} // namespace opencl
......
......@@ -43,8 +43,10 @@ class BilinearInterpImageCompute
bilinear_interp_param_ = param_.get_mutable<param_t>();
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/bilinear_interp_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/bilinear_interp_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -103,7 +105,7 @@ class BilinearInterpImageCompute
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0;
......@@ -159,6 +161,7 @@ class BilinearInterpImageCompute
param_t* bilinear_interp_param_{nullptr};
std::string kernel_func_name_{"bilinear_interp"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -38,8 +38,10 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
} else {
kernel_func_name_ = "concat_mul";
}
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/concat_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/concat_kernel.cl",
build_options_,
time_stamp_);
auto axis = concat_param_->axis;
auto inputs = concat_param_->x;
......@@ -88,7 +90,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto inputs = param.x;
int arg_idx = 0;
......@@ -177,6 +179,7 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
param_t* concat_param_{nullptr};
std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -40,8 +40,10 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
kernel_func_name_ = "concat_mul";
}
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel(
kernel_func_name_, "image/concat_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/concat_kernel.cl",
build_options_,
time_stamp_);
auto axis = concat_param_->axis;
auto inputs = concat_param_->x;
......@@ -117,7 +119,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto inputs = param.x;
int arg_idx = 0;
......@@ -251,6 +253,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
param_t* concat_param_{nullptr};
std::string kernel_func_name_{};
std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -114,8 +114,10 @@ void ConvCompute::PrepareForRun() {
}
for (size_t i = 0; i < kernel_func_names_.size(); i++) {
context.cl_context()->AddKernel(
kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]);
context.cl_context()->AddKernel(kernel_func_names_[i],
kernel_func_paths_[i],
build_options_[i],
time_stamp_);
}
}
......@@ -153,7 +155,7 @@ void ConvCompute::GemmlikeConv2d() {
auto& context = ctx_->As<OpenCLContext>();
std::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_;
auto img2col_kernel = context.cl_context()->GetKernel(kernel_key.str());
int n_threads = c_in * h_out * w_out;
......@@ -218,7 +220,7 @@ void ConvCompute::GemmlikeConv2d() {
int n = h_out * w_out;
VLOG(4) << "m = " << m << " n = " << n << " k = " << k;
kernel_key.str("");
kernel_key << kernel_func_names_[1] << build_options_[1];
kernel_key << kernel_func_names_[1] << build_options_[1] << time_stamp_;
auto gemm_kernel = context.cl_context()->GetKernel(kernel_key.str());
GemmBatched(
gemm_kernel, col_buf, filter_buf, bias_buf, output_buf, bs, m, n, k);
......@@ -249,7 +251,8 @@ void ConvCompute::Conv2d1x1() {
auto& context = ctx_->As<OpenCLContext>();
std::stringstream kernel_key;
kernel_key << kernel_func_names_.front() << build_options_.front();
kernel_key << kernel_func_names_.front() << build_options_.front()
<< time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
GemmBatched(kernel, x_d, filter_d, bias_d, output_d, batch_size, m, n, k);
......
......@@ -21,6 +21,7 @@
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
namespace paddle {
......@@ -55,6 +56,7 @@ class ConvCompute
std::vector<std::string> kernel_func_names_{};
std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -369,15 +369,17 @@ void ConvImageCompute::PrepareForRun() {
build_options_.push_back(build_options_single);
for (size_t i = 0; i < kernel_func_names_.size(); i++) {
context.cl_context()->AddKernel(
kernel_func_names_[i], kernel_func_paths_[i], build_options_[i]);
context.cl_context()->AddKernel(kernel_func_names_[i],
kernel_func_paths_[i],
build_options_[i],
time_stamp_);
}
VLOG(4) << "global_work_size_[3D]: {" << global_work_size_[0] << ","
<< global_work_size_[1] << "," << global_work_size_[2] << "}";
std::stringstream kernel_key;
kernel_key << kernel_func_names_[0] << build_options_[0];
kernel_key << kernel_func_names_[0] << build_options_[0] << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << "kernel_key: " << kernel_key.str();
VLOG(4) << "kernel ready ... " << kernel_key.str();
......
......@@ -22,6 +22,7 @@
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
namespace paddle {
......@@ -56,6 +57,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
std::vector<std::string> kernel_func_names_{};
std::vector<std::string> kernel_func_paths_{};
std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
Tensor filter_gpu_image_;
Tensor bias_gpu_image_;
......
......@@ -44,8 +44,10 @@ class DepthwiseConv2dCompute
build_options_ += " -DRELU6";
}
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/depthwise_conv2d_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/depthwise_conv2d_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -67,7 +69,7 @@ class DepthwiseConv2dCompute
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status;
......@@ -120,6 +122,7 @@ class DepthwiseConv2dCompute
private:
std::string kernel_func_name_{"depthwise_conv2d"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -40,8 +40,10 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel(
kernel_func_name_, "image/dropout_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/dropout_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -63,7 +65,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status;
......@@ -101,6 +103,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
private:
std::string kernel_func_name_{"dropout"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -25,8 +25,10 @@ namespace opencl {
void ElementwiseAddCompute::PrepareForRun() {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>();
UpdateParams();
}
......@@ -39,7 +41,7 @@ void ElementwiseAddCompute::Run() {
auto* out_buf = ele_param_->Out->template mutable_data<float, cl::Buffer>(
TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
VLOG(4) << TargetToStr(ele_param_->X->target());
......
......@@ -16,6 +16,7 @@
#include <memory>
#include <string>
#include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h"
......@@ -46,6 +47,7 @@ class ElementwiseAddCompute
param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_add"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -59,11 +59,13 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() {
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
// compute image shape
......
......@@ -18,6 +18,7 @@
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h"
......@@ -54,8 +55,10 @@ class ElementwiseAddImageCompute
{static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)}));
DDim out_img_shape_ = DDim(std::vector<DDim::value_type>(
{static_cast<DDim::value_type>(1), static_cast<DDim::value_type>(1)}));
std::string kernel_func_name_{"elementwise_add"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
bool first_epoch_for_reinit_{true};
cl::Kernel kernel_;
cl::NDRange global_work_size_ = cl::NDRange{
......
......@@ -71,8 +71,10 @@ class ElementwiseMulImageCompute
VLOG(4) << "bias_dims.size():" << bias_dims.size();
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/elementwise_mul_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/elementwise_mul_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -114,7 +116,7 @@ class ElementwiseMulImageCompute
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto bias_dims = y->dims();
......@@ -201,6 +203,7 @@ class ElementwiseMulImageCompute
param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_mul"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -49,8 +49,10 @@ void ElementwiseSubImageCompute::PrepareForRun() {
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/elementwise_sub_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/elementwise_sub_kernel.cl",
build_options_,
time_stamp_);
}
void ElementwiseSubImageCompute::Run() {
......@@ -93,7 +95,7 @@ void ElementwiseSubImageCompute::Run() {
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0;
......
......@@ -17,6 +17,7 @@
#include <string>
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h"
......@@ -44,6 +45,7 @@ class ElementwiseSubImageCompute
param_t* ele_param_{nullptr};
std::string kernel_func_name_{"elementwise_sub"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
#include "lite/utils/replace_stl/stream.h"
#include "lite/utils/string.h"
......@@ -74,10 +75,12 @@ class FcCompute
}
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/fc_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/fc_kernel.cl",
build_options_,
time_stamp_);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
// compute global work size
......@@ -136,6 +139,7 @@ class FcCompute
param_t* fc_param_{nullptr};
std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_float "};
std::string time_stamp_{GetTimeStamp()};
bool first_epoch_for_reinit_{true};
DDim last_x_dims_;
cl::NDRange global_work_size_;
......
......@@ -28,8 +28,10 @@ class FusionElementwiseAddActivationCompute : public ElementwiseAddCompute {
void PrepareForRun() override {
build_options_ += " -DRELU";
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/elementwise_add_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>();
UpdateParams();
auto act_t = static_cast<param_t*>(ele_param_)->act_type;
......
......@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/opencl/elementwise_add_image_compute.h"
#include "lite/kernels/opencl/image_helper.h"
namespace paddle {
namespace lite {
......@@ -30,8 +31,10 @@ class FusionElementwiseAddActivationImageCompute
void PrepareForRun() override {
build_options_ += " -DRELU";
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/elementwise_add_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/elementwise_add_kernel.cl",
build_options_,
time_stamp_);
ele_param_ = param_.get_mutable<param_t>();
auto act_t = static_cast<param_t*>(ele_param_)->act_type;
VLOG(4) << "act: " << act_t;
......
......@@ -40,12 +40,14 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/grid_sampler_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/grid_sampler_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
VLOG(4) << "kernel_key: " << kernel_key.str();
}
......@@ -150,6 +152,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -74,6 +74,12 @@ static std::vector<size_t> DefaultWorkSize(const DDim& image_dim,
LOG(FATAL) << " not support this dim, need imp ";
}
static const std::string GetTimeStamp() {
struct timeval time;
gettimeofday(&time, NULL);
return std::to_string(time.tv_usec);
}
} // namespace opencl
} // namespace kernels
} // namespace lite
......
......@@ -60,8 +60,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
}
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/instance_norm_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/instance_norm_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -115,7 +117,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
out_image_shape["width"], out_image_shape["height"]);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status = kernel.setArg(0, out_w);
......@@ -180,8 +182,10 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
bias_image_.mutable_data<half_t, cl::Image2D>(
scale_img_size[0], scale_img_size[1], bias_img.data());
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/instance_norm_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/instance_norm_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -234,7 +238,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
auto* scale_img = scale_image_.data<half_t, cl::Image2D>();
auto* bias_img = bias_image_.data<half_t, cl::Image2D>();
......@@ -271,6 +275,7 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
param_t* instance_norm_param_{nullptr};
std::string kernel_func_name_{"instance_norm_onnx"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
Tensor scale_image_;
Tensor bias_image_;
......
......@@ -48,7 +48,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
beta_ = lrn_param_->beta;
norm_region_ = lrn_param_->norm_region;
context.cl_context()->AddKernel(
kernel_func_name_, "image/lrn_kernel.cl", build_options_);
kernel_func_name_, "image/lrn_kernel.cl", build_options_, time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -91,7 +91,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0;
......@@ -152,6 +152,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
std::string norm_region_{"AcrossChannels"};
std::string kernel_func_name_{"lrn"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -16,6 +16,7 @@
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/opencl/image_helper.h"
#include "lite/operators/op_params.h"
#include "lite/utils/replace_stl/stream.h"
#include "lite/utils/string.h"
......@@ -32,8 +33,10 @@ class MulCompute
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/mat_mul_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/mat_mul_kernel.cl",
build_options_,
time_stamp_);
const auto& param = *param_.get_mutable<param_t>();
const auto* x_data = param.x->data<float>();
const auto* y_data = param.y->data<float>();
......@@ -68,7 +71,7 @@ class MulCompute
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status;
......@@ -103,6 +106,7 @@ class MulCompute
int m_, n_, k_;
std::string kernel_func_name_{"mat_mul"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -38,8 +38,10 @@ class NearestInterpComputeImageDefault
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/nearest_interp_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/nearest_interp_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -66,7 +68,7 @@ class NearestInterpComputeImageDefault
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0;
......@@ -121,6 +123,7 @@ class NearestInterpComputeImageDefault
private:
std::string kernel_func_name_{"nearest_interp"};
std::string build_options_{" -DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -52,8 +52,10 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
}
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/pad2d_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/pad2d_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
}
......@@ -93,7 +95,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
#endif
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int arg_idx = 0;
......@@ -159,6 +161,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
param_t* pad2d_param_{nullptr};
std::string kernel_func_name_{};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -37,8 +37,10 @@ class PoolCompute
const auto& param = *param_.get_mutable<param_t>();
kernel_func_name_ += param.pooling_type;
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "buffer/pool_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"buffer/pool_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -69,7 +71,7 @@ class PoolCompute
auto* output_buf =
param.output->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status;
auto numel = out_dims.production();
......@@ -117,6 +119,7 @@ class PoolCompute
private:
std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_float"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -47,7 +47,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/pool_kernel.cl", build_options_);
kernel_func_name_, "image/pool_kernel.cl", build_options_, time_stamp_);
}
void Run() override {
......@@ -112,7 +112,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
// VLOG(4) << "out_image" << out_img;
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
int c_block = (out_dims[1] + 3) / 4;
......@@ -164,6 +164,7 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
private:
std::string kernel_func_name_{"pool_"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -36,8 +36,10 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel(
kernel_func_name_, "image/reshape_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/reshape_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -110,7 +112,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
#ifndef LITE_SHUTDOWN_LOG
......@@ -166,6 +168,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
private:
std::string kernel_func_name_{"reshape"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -37,12 +37,14 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
context.cl_context()->AddKernel(
kernel_func_name_, "image/scale_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/scale_kernel.cl",
build_options_,
time_stamp_);
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
kernel_ = context.cl_context()->GetKernel(kernel_key.str());
}
......@@ -105,6 +107,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
private:
std::string kernel_func_name_{"scale"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
param_t* scale_param_{nullptr};
......
......@@ -38,8 +38,10 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override {
auto& context = ctx_->As<OpenCLContext>();
VLOG(1) << "kernel_func_name_:" << kernel_func_name_;
context.cl_context()->AddKernel(
kernel_func_name_, "image/slice_kernel.cl", build_options_);
context.cl_context()->AddKernel(kernel_func_name_,
"image/slice_kernel.cl",
build_options_,
time_stamp_);
}
void Run() override {
......@@ -68,7 +70,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
STL::stringstream kernel_key;
kernel_key << kernel_func_name_ << build_options_;
kernel_key << kernel_func_name_ << build_options_ << time_stamp_;
auto kernel = context.cl_context()->GetKernel(kernel_key.str());
cl_int status;
......@@ -108,6 +110,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
private:
std::string kernel_func_name_{"slice"};
std::string build_options_{"-DCL_DTYPE_half"};
std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event};
};
......
......@@ -192,6 +192,7 @@ function build_opencl {
cmake_opencl ${os} ${abi} ${lang}
make opencl_clhpp -j$NUM_CORES_FOR_COMPILE
make publish_inference -j$NUM_CORES_FOR_COMPILE
build $TESTS_FILE
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册