未验证 提交 c8918d89 编写于 作者: X xiebaiyuan 提交者: GitHub

[LITE][OPENCL][Image] use unique_ptr to wrap filter and bias in conv image...

[LITE][OPENCL][Image] use unique_ptr to wrap filter and bias in conv image compute , test=develop (#3388)
上级 e55542dc
...@@ -38,6 +38,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -38,6 +38,7 @@ void ConvImageCompute::PrepareForRun() {
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
filter_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
int bs = x_dims[0]; int bs = x_dims[0];
int c_in = x_dims[1]; int c_in = x_dims[1];
int h_out = output_dims[2]; int h_out = output_dims[2];
...@@ -113,7 +114,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -113,7 +114,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d1x1opt; impl_ = &ConvImageCompute::Conv2d1x1opt;
...@@ -174,7 +175,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -174,7 +175,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
#endif #endif
...@@ -194,7 +195,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -194,7 +195,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::DepthwiseConv2d; impl_ = &ConvImageCompute::DepthwiseConv2d;
...@@ -209,7 +210,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -209,7 +210,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d3x3opt; impl_ = &ConvImageCompute::Conv2d3x3opt;
...@@ -241,7 +242,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -241,7 +242,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d5x5; impl_ = &ConvImageCompute::Conv2d5x5;
...@@ -257,7 +258,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -257,7 +258,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<half_t, cl::Image2D>( filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d5x5opt; impl_ = &ConvImageCompute::Conv2d5x5opt;
...@@ -290,7 +291,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -290,7 +291,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
this->filter_gpu_image_.mutable_data<half_t, cl::Image2D>( this->filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d7x7; impl_ = &ConvImageCompute::Conv2d7x7;
...@@ -306,7 +307,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -306,7 +307,7 @@ void ConvImageCompute::PrepareForRun() {
std::vector<half_t> filter_image_v(filter_image_dims[0] * std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims); converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
this->filter_gpu_image_.mutable_data<half_t, cl::Image2D>( this->filter_gpu_image_->mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data()); filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d7x7opt; impl_ = &ConvImageCompute::Conv2d7x7opt;
...@@ -349,6 +350,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -349,6 +350,7 @@ void ConvImageCompute::PrepareForRun() {
const bool is_element_wise_bias = const bool is_element_wise_bias =
has_bias && param.output->dims() == param.bias->dims(); has_bias && param.output->dims() == param.bias->dims();
if (has_bias) { if (has_bias) {
bias_gpu_image_ = std::unique_ptr<Tensor>(new Tensor);
build_options_single += build_options_single +=
is_element_wise_bias ? " -DBIASE_ELE" : " -DBIASE_CH"; is_element_wise_bias ? " -DBIASE_ELE" : " -DBIASE_CH";
...@@ -361,7 +363,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -361,7 +363,7 @@ void ConvImageCompute::PrepareForRun() {
float* bias_cpu_data = param.bias->mutable_data<float>(); float* bias_cpu_data = param.bias->mutable_data<float>();
bias_converter.NCHWToImage( bias_converter.NCHWToImage(
bias_cpu_data, bias_image_v.data(), param.bias->dims()); bias_cpu_data, bias_image_v.data(), param.bias->dims());
this->bias_gpu_image_.mutable_data<half_t, cl::Image2D>( this->bias_gpu_image_->mutable_data<half_t, cl::Image2D>(
bias_image_dims[0], bias_image_dims[1], bias_image_v.data()); bias_image_dims[0], bias_image_dims[1], bias_image_v.data());
// convert cpu buffer bias --> gpu image --- end ---- // convert cpu buffer bias --> gpu image --- end ----
} }
...@@ -434,7 +436,7 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) { ...@@ -434,7 +436,7 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto strides = param.strides; auto strides = param.strides;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -498,7 +500,7 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) { ...@@ -498,7 +500,7 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) {
const cl::Buffer* bias_buf = nullptr; const cl::Buffer* bias_buf = nullptr;
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -565,7 +567,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { ...@@ -565,7 +567,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) {
auto strides = param.strides; auto strides = param.strides;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -647,7 +649,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) { ...@@ -647,7 +649,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -732,7 +734,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) { ...@@ -732,7 +734,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) {
auto dilations = *param.dilations; auto dilations = *param.dilations;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -781,7 +783,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) { ...@@ -781,7 +783,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -856,7 +858,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) { ...@@ -856,7 +858,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto strides = param.strides; auto strides = param.strides;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -914,7 +916,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) { ...@@ -914,7 +916,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -989,7 +991,7 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) { ...@@ -989,7 +991,7 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) {
auto dilations = *param.dilations; auto dilations = *param.dilations;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -1039,7 +1041,7 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) { ...@@ -1039,7 +1041,7 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -1106,7 +1108,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) { ...@@ -1106,7 +1108,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto strides = param.strides; auto strides = param.strides;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -1164,7 +1166,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) { ...@@ -1164,7 +1166,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -1239,7 +1241,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) { ...@@ -1239,7 +1241,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
auto dilations = *param.dilations; auto dilations = *param.dilations;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -1287,7 +1289,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) { ...@@ -1287,7 +1289,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
...@@ -1357,11 +1359,11 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { ...@@ -1357,11 +1359,11 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
auto dilations = *param.dilations; auto dilations = *param.dilations;
auto* input_img = param.x->data<half_t, cl::Image2D>(); auto* input_img = param.x->data<half_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_img = filter_gpu_image_->data<half_t, cl::Image2D>();
const cl::Image2D* bias_img = nullptr; const cl::Image2D* bias_img = nullptr;
if (param.bias) { if (param.bias) {
bias_img = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_img = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto image_shape = InitImageDimInfoWith(output_dims); auto image_shape = InitImageDimInfoWith(output_dims);
...@@ -1389,7 +1391,7 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { ...@@ -1389,7 +1391,7 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
has_bias && param.output->dims() == param.bias->dims(); has_bias && param.output->dims() == param.bias->dims();
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif #endif
...@@ -1444,11 +1446,11 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) { ...@@ -1444,11 +1446,11 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
int input_c_block = (x_dims[1] + 3) / 4; int input_c_block = (x_dims[1] + 3) / 4;
auto* input_img = param.x->data<half_t, cl::Image2D>(); auto* input_img = param.x->data<half_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_img = filter_gpu_image_->data<half_t, cl::Image2D>();
const cl::Image2D* bias_img = nullptr; const cl::Image2D* bias_img = nullptr;
if (param.bias) { if (param.bias) {
bias_img = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_img = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto image_shape = InitImageDimInfoWith(output_dims); auto image_shape = InitImageDimInfoWith(output_dims);
...@@ -1487,7 +1489,7 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) { ...@@ -1487,7 +1489,7 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
has_bias && param.output->dims() == param.bias->dims(); has_bias && param.output->dims() == param.bias->dims();
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
#ifndef LITE_SHUTDOWN_LOG #ifndef LITE_SHUTDOWN_LOG
VLOG(4) << "set bias_image: "; VLOG(4) << "set bias_image: ";
#endif #endif
...@@ -1536,7 +1538,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) { ...@@ -1536,7 +1538,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto strides = param.strides; auto strides = param.strides;
auto* input_image = param.x->data<half_t, cl::Image2D>(); auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>(); auto* filter_image = filter_gpu_image_->data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims(); auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims(); auto output_dims = param.output->dims();
...@@ -1595,7 +1597,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) { ...@@ -1595,7 +1597,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) {
const cl::Buffer* bias_buf = nullptr; const cl::Buffer* bias_buf = nullptr;
const cl::Image2D* bias_image = nullptr; const cl::Image2D* bias_image = nullptr;
if (has_bias) { if (has_bias) {
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>(); bias_image = bias_gpu_image_->data<half_t, cl::Image2D>();
} }
auto kernel = kernel_; auto kernel = kernel_;
......
...@@ -59,8 +59,8 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -59,8 +59,8 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
std::vector<std::string> build_options_{}; std::vector<std::string> build_options_{};
std::string time_stamp_{GetTimeStamp()}; std::string time_stamp_{GetTimeStamp()};
std::shared_ptr<cl::Event> event_{new cl::Event}; std::shared_ptr<cl::Event> event_{new cl::Event};
Tensor filter_gpu_image_; std::unique_ptr<Tensor> filter_gpu_image_{nullptr};
Tensor bias_gpu_image_; std::unique_ptr<Tensor> bias_gpu_image_{nullptr};
cl::NDRange global_work_size_ = cl::NDRange{ cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
int c_blk_ = 1; int c_blk_ = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册