未验证 提交 d341fccb 编写于 作者: Y ysh329 提交者: GitHub

[OPENCL] remove conv redundant's for opencl kernel. test=develop (#3924)

remove conv redundant's for opencl kernel.
上级 4780849f
...@@ -119,7 +119,7 @@ cl::NDRange CLContext::DefaultWorkSize(const CLImage &image) { ...@@ -119,7 +119,7 @@ cl::NDRange CLContext::DefaultWorkSize(const CLImage &image) {
} }
} }
cl::NDRange CLContext::LocalWorkSizeTurn(cl::NDRange global_work_size, cl::NDRange CLContext::LocalWorkSizeTune(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divisor) { int divisor) {
int preferred_lws = 0; int preferred_lws = 0;
...@@ -157,7 +157,7 @@ cl::NDRange CLContext::LocalWorkSizeTurn(cl::NDRange global_work_size, ...@@ -157,7 +157,7 @@ cl::NDRange CLContext::LocalWorkSizeTurn(cl::NDRange global_work_size,
static_cast<size_t>(gws0)}; static_cast<size_t>(gws0)};
#endif #endif
} }
cl::NDRange CLContext::LocalWorkSizeTurnReverse(cl::NDRange global_work_size, cl::NDRange CLContext::LocalWorkSizeTuneReverse(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divisor) { int divisor) {
int preferred_lws = 0; int preferred_lws = 0;
......
...@@ -62,10 +62,10 @@ class CLContext { ...@@ -62,10 +62,10 @@ class CLContext {
cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size); cl::NDRange LocalWorkSize(cl::NDRange global_work_size, size_t max_work_size);
cl::NDRange LocalWorkSizeTurn(cl::NDRange global_work_size, cl::NDRange LocalWorkSizeTune(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divitor = 2); int divitor = 2);
cl::NDRange LocalWorkSizeTurnReverse(cl::NDRange global_work_size, cl::NDRange LocalWorkSizeTuneReverse(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divitor = 2); int divitor = 2);
bool IsArmMali(); bool IsArmMali();
......
...@@ -6,9 +6,7 @@ __kernel void conv2d_1x1_opt( ...@@ -6,9 +6,7 @@ __kernel void conv2d_1x1_opt(
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
#ifdef BATCH_NORM #ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_scale,
__read_only image2d_t new_biase, __read_only image2d_t new_biase,
...@@ -284,9 +282,7 @@ __kernel void conv2d_1x1_simple( ...@@ -284,9 +282,7 @@ __kernel void conv2d_1x1_simple(
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
#ifdef BATCH_NORM #ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_scale,
__read_only image2d_t new_biase, __read_only image2d_t new_biase,
......
...@@ -19,9 +19,7 @@ __kernel void conv2d_3x3(__private const int global_size_dim0, ...@@ -19,9 +19,7 @@ __kernel void conv2d_3x3(__private const int global_size_dim0,
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int offset, __private const int offset,
......
...@@ -19,9 +19,7 @@ __kernel void conv2d_3x3_opt(__private const int item_ch, ...@@ -19,9 +19,7 @@ __kernel void conv2d_3x3_opt(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
...@@ -264,9 +262,7 @@ __kernel void conv2d_3x3_multi_batch(__private const int item_ch, ...@@ -264,9 +262,7 @@ __kernel void conv2d_3x3_multi_batch(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
......
...@@ -5,9 +5,7 @@ __kernel void conv2d_5x5(__private const int global_size_dim0, ...@@ -5,9 +5,7 @@ __kernel void conv2d_5x5(__private const int global_size_dim0,
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
#ifdef BATCH_NORM #ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_scale,
__read_only image2d_t new_biase, __read_only image2d_t new_biase,
......
...@@ -20,9 +20,7 @@ __kernel void conv2d_5x5_opt(__private const int item_ch, ...@@ -20,9 +20,7 @@ __kernel void conv2d_5x5_opt(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
...@@ -268,9 +266,7 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch, ...@@ -268,9 +266,7 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
...@@ -513,4 +509,4 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch, ...@@ -513,4 +509,4 @@ __kernel void conv2d_5x5_multi_batch(__private const int item_ch,
(int2)(out_w_base_id + out_w_id4, item_h_id), (int2)(out_w_base_id + out_w_id4, item_h_id),
output[4]); output[4]);
} }
} }
\ No newline at end of file
...@@ -5,9 +5,7 @@ __kernel void conv2d_7x7(__private const int global_size_dim0, ...@@ -5,9 +5,7 @@ __kernel void conv2d_7x7(__private const int global_size_dim0,
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
#ifdef BATCH_NORM #ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_scale,
__read_only image2d_t new_biase, __read_only image2d_t new_biase,
......
...@@ -20,9 +20,7 @@ __kernel void conv2d_7x7_opt(__private const int item_ch, ...@@ -20,9 +20,7 @@ __kernel void conv2d_7x7_opt(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
...@@ -268,9 +266,7 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch, ...@@ -268,9 +266,7 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch,
__private const int item_h, __private const int item_h,
__read_only image2d_t input_image, __read_only image2d_t input_image,
__read_only image2d_t filter_image, __read_only image2d_t filter_image,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
...@@ -513,4 +509,4 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch, ...@@ -513,4 +509,4 @@ __kernel void conv2d_7x7_multi_batch(__private const int item_ch,
(int2)(out_w_base_id + out_w_id4, item_h_id), (int2)(out_w_base_id + out_w_id4, item_h_id),
output[4]); output[4]);
} }
} }
\ No newline at end of file
...@@ -19,9 +19,7 @@ __kernel void depth_conv2d(__private const int global_size_dim0, ...@@ -19,9 +19,7 @@ __kernel void depth_conv2d(__private const int global_size_dim0,
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input, __read_only image2d_t input,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
#ifdef BATCH_NORM #ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_scale,
__read_only image2d_t new_biase, __read_only image2d_t new_biase,
......
...@@ -20,9 +20,7 @@ __kernel void depth_conv2d_3x3( ...@@ -20,9 +20,7 @@ __kernel void depth_conv2d_3x3(
__private const int global_size_dim2, __private const int global_size_dim2,
__read_only image2d_t input, __read_only image2d_t input,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int offset, __private const int offset,
...@@ -249,9 +247,7 @@ __kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk, ...@@ -249,9 +247,7 @@ __kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk,
__private const int ou_nh, __private const int ou_nh,
__read_only image2d_t input, __read_only image2d_t input,
__read_only image2d_t filter, __read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias, __read_only image2d_t bias,
#endif
__write_only image2d_t output_image, __write_only image2d_t output_image,
__private const int stride, __private const int stride,
__private const int pad, __private const int pad,
......
...@@ -33,6 +33,7 @@ namespace paddle { ...@@ -33,6 +33,7 @@ namespace paddle {
namespace lite { namespace lite {
namespace kernels { namespace kernels {
namespace opencl { namespace opencl {
class ConvImageCompute : public KernelLite<TARGET(kOpenCL), class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
PRECISION(kFP16), PRECISION(kFP16),
DATALAYOUT(kImageDefault)> { DATALAYOUT(kImageDefault)> {
...@@ -42,8 +43,11 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -42,8 +43,11 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
void PrepareForRun() override; void PrepareForRun() override;
void ReInitWhenNeeded() override;
void Run() override; void Run() override;
double Turn(int times = 5);
double Tune(int times = 5);
#ifdef LITE_WITH_PROFILE #ifdef LITE_WITH_PROFILE
void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) { void SetProfileRuntimeKernelInfo(paddle::lite::profile::OpCharacter* ch) {
...@@ -56,16 +60,20 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -56,16 +60,20 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
#endif #endif
private: private:
void Conv2d1x1opt(bool is_turn = false); void PrintConvInfo();
void Conv2d3x3(bool is_turn = false); void GetGlobalWorkSize();
void Conv2d3x3opt(bool is_turn = false); void Conv2d1x1opt(bool enable_tune = false);
void Conv2d5x5(bool is_turn = false); void Conv2d3x3(bool enable_tune = false);
void Conv2d5x5opt(bool is_turn = false); void Conv2d3x3opt(bool enable_tune = false);
void Conv2d7x7(bool is_turn = false); void Conv2d5x5(bool enable_tune = false);
void Conv2d7x7opt(bool is_turn = false); void Conv2d5x5opt(bool enable_tune = false);
void DepthwiseConv2d3x3s1(bool is_turn = false); void Conv2d7x7(bool enable_tune = false);
void DepthwiseConv2d3x3(bool is_turn = false); void Conv2d7x7opt(bool enable_tune = false);
void DepthwiseConv2d(bool is_turn = false); void DepthwiseConv2d3x3s1(bool enable_tune = false);
void DepthwiseConv2d3x3(bool enable_tune = false);
void DepthwiseConv2d(bool enable_tune = false);
param_t* conv_param_{nullptr};
kernel_t impl_; kernel_t impl_;
std::vector<std::string> kernel_func_names_{}; std::vector<std::string> kernel_func_names_{};
...@@ -79,19 +87,72 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -79,19 +87,72 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
std::unique_ptr<Tensor> tensor_hold_bias_image_{nullptr}; std::unique_ptr<Tensor> tensor_hold_bias_image_{nullptr};
cl::NDRange global_work_size_ = cl::NDRange{ cl::NDRange global_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
// opencl kernel args
int c_blk_ = 1; int c_blk_ = 1;
int w_blk_ = 1; int w_blk_ = 1;
int nh_blk_ = 1; int nh_blk_ = 1;
const cl::Image2D* input_image_p_{nullptr};
const cl::Image2D* filter_image_p_{nullptr};
const cl::Image2D* bias_image_p_{nullptr};
const cl::Image2D* output_image_p_{nullptr};
int stride_h_{-1};
int stride_w_{-1};
int dilation_h_{-1};
int dilation_w_{-1};
int pad_up_{-1};
int pad_down_{-1};
int pad_left_{-1};
int pad_right_{-1};
int offset_{-1};
int groups_{-1};
bool relu_fused_{false};
bool has_bias_{false};
int input_tensor_n_{-1};
int input_tensor_c_{-1};
int input_tensor_h_{-1};
int input_tensor_w_{-1};
int input_image_h_{-1};
int input_image_w_{-1};
int input_c_block_{-1};
int output_tensor_n_{-1};
int output_tensor_c_{-1};
int output_tensor_h_{-1};
int output_tensor_w_{-1};
int output_image_h_{-1};
int output_image_w_{-1};
int filter_tensor_n_{-1};
int filter_tensor_c_{-1};
int filter_tensor_h_{-1};
int filter_tensor_w_{-1};
int filter_image_h_{-1};
int filter_image_w_{-1};
int bias_image_h_{-1};
int bias_image_w_{-1};
int default_c_blk_ = 1; int default_c_blk_ = 1;
int default_w_blk_ = 1; int default_w_blk_ = 1;
int default_nh_blk_ = 1; int default_nh_blk_ = 1;
// =================
DDim last_input_dims_{};
bool is_first_epoch_for_run_{true};
cl::Kernel kernel_; cl::Kernel kernel_;
cl_int status_;
cl::NDRange local_work_size_ = cl::NDRange{ cl::NDRange local_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
bool use_lws_{true}; bool use_lws_{true};
bool use_turn_{false}; bool use_tune_{false};
}; };
} // namespace opencl } // namespace opencl
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册