提交 ac1c2581 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1079 from codeWorm2015/opencl

 init image when init kernel
...@@ -61,6 +61,7 @@ class CLImage { ...@@ -61,6 +61,7 @@ class CLImage {
PADDLE_MOBILE_THROW_EXCEPTION( PADDLE_MOBILE_THROW_EXCEPTION(
" empty image tensor data shouldn't have value"); " empty image tensor data shouldn't have value");
} }
DLOG << " init empty image ";
InitCLImage(context, nullptr, dim); InitCLImage(context, nullptr, dim);
initialized_ = true; initialized_ = true;
} }
......
...@@ -24,9 +24,16 @@ namespace operators { ...@@ -24,9 +24,16 @@ namespace operators {
template <> template <>
bool ConvAddBNReluKernel<GPU_CL, float>::Init( bool ConvAddBNReluKernel<GPU_CL, float>::Init(
FusionConvAddBNReluParam<GPU_CL> *param) { FusionConvAddBNReluParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
param->Bias()->InitCLImage(cl_helper_.CLContext());
// const CL *mean = param->InputMean(); // const CL *mean = param->InputMean();
const framework::CLImage *mean = param->InputMean(); const framework::CLImage *mean = param->InputMean();
const framework::CLImage *variance = param->InputVariance(); const framework::CLImage *variance = param->InputVariance();
const framework::CLImage *scale = param->InputScale(); const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias(); const framework::CLImage *bias = param->InputBias();
...@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -52,9 +59,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
} }
delete[](new_scale_ptr);
delete[](new_bias_ptr);
framework::CLImage *new_scale = new framework::CLImage(); framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims()); new_scale->SetTensorData(new_scale_ptr, variance->dims());
...@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -68,6 +72,9 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
......
...@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) { ...@@ -25,6 +25,9 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
param->Bias()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); param->SetOffset(offset);
......
...@@ -26,19 +26,18 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -26,19 +26,18 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); param->SetOffset(offset);
DLOG << " init helper: " << &cl_helper_; DLOG << " init helper: " << &cl_helper_;
DLOG << " conv kernel add kernel ~ "; DLOG << " conv kernel add kernel ~ ";
DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock(); DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock(); DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
DLOG << " filter dims: " << param->Filter()->dims(); DLOG << " filter dims: " << param->Filter()->dims();
if (param->Filter()->WidthOfOneBlock() == 1 && if (param->Filter()->WidthOfOneBlock() == 1 &&
param->Filter()->HeightOfOneBlock() == 1) { param->Filter()->HeightOfOneBlock() == 1) {
......
...@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -27,6 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); param->SetOffset(offset);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册