提交 89118525 编写于 作者: X xiebaiyuan 提交者: GitHub

fix group like conv 3x3 (#3498)

* add test for imfix ,test=develop

* fix group like 3x3,test=develop

* add direct release build script, test=develop

* fix group like conv , test=develop
上级 9aee2ad5
...@@ -202,7 +202,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -202,7 +202,6 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
// //
// } else { // } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " " // std::cout << " input dim " << param->Input()->dims()[0] << " "
...@@ -218,7 +217,15 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -218,7 +217,15 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
// param->Filter()->dims()[2] // param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl; // << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options); if (param->groups > 1) {
param->ExecMode() =
ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
} else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
}
// } // }
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
...@@ -236,7 +243,7 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute( ...@@ -236,7 +243,7 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias()); param.NewScale(), param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(), ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
......
...@@ -96,10 +96,18 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) { ...@@ -96,10 +96,18 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
// //
// } else { // } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
if (param->groups > 1) {
param->ExecMode() =
ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
} else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
}
// } // }
} else if (param->Filter()->dims()[2] == 7 && } else if (param->Filter()->dims()[2] == 7 &&
...@@ -130,6 +138,7 @@ void ConvAddKernel<GPU_CL, float>::Compute( ...@@ -130,6 +138,7 @@ void ConvAddKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, false, param.Bias()); WinogradConv3x3<4, 3>(&this->cl_helper_, param, false, param.Bias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
......
...@@ -96,7 +96,6 @@ bool ConvAddReluKernel<GPU_CL, float>::Init( ...@@ -96,7 +96,6 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
// //
// } else { // } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " " // std::cout << " input dim " << param->Input()->dims()[0] << " "
...@@ -112,7 +111,16 @@ bool ConvAddReluKernel<GPU_CL, float>::Init( ...@@ -112,7 +111,16 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
// param->Filter()->dims()[2] // param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl; // << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options); if (param->groups > 1) {
param->ExecMode() =
ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
} else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
}
// } // }
} else if (param->Filter()->dims()[2] == 7 && } else if (param->Filter()->dims()[2] == 7 &&
...@@ -146,7 +154,7 @@ void ConvAddReluKernel<GPU_CL, float>::Compute( ...@@ -146,7 +154,7 @@ void ConvAddReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias()); WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
......
...@@ -153,11 +153,18 @@ bool ConvBNReluKernel<GPU_CL, float>::Init( ...@@ -153,11 +153,18 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
// //
// } else { // } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options); if (param->groups > 1) {
param->ExecMode() =
ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
} else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
}
// } // }
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
...@@ -174,7 +181,7 @@ void ConvBNReluKernel<GPU_CL, float>::Compute( ...@@ -174,7 +181,7 @@ void ConvBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias()); param.NewScale(), param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(), ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
......
...@@ -90,7 +90,6 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -90,7 +90,6 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
// //
// } else { // } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " " // std::cout << " input dim " << param->Input()->dims()[0] << " "
...@@ -105,8 +104,15 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -105,8 +104,15 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
// << param->Filter()->dims()[1] << " " << // << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2] // param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl; // << " " << param->Filter()->dims()[3] << " " << std::endl;
if (param->groups > 1) {
param->ExecMode() =
ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file);
} else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file);
}
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file);
// } // }
DLOG << "conv 3x3"; DLOG << "conv 3x3";
} else if (param->Filter()->dims()[2] == 7 && } else if (param->Filter()->dims()[2] == 7 &&
...@@ -132,7 +138,7 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -132,7 +138,7 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
WinogradConv3x3<4, 3>(&this->cl_helper_, param); WinogradConv3x3<4, 3>(&this->cl_helper_, param);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
......
...@@ -96,29 +96,14 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) { ...@@ -96,29 +96,14 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) {
// winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter());
// //
// } else { // } else {
if (param->Strides()[0] == 1 && param->Dilations()[0] == 1) { param->Filter()->InitCLImage(cl_helper_.CLContext(),
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3S1_FLOAT; cl_helper_.CLCommandQueue());
param->Filter()->InitCLImage(cl_helper_.CLContext(), if (param->groups > 1) {
cl_helper_.CLCommandQueue()); param->ExecMode() =
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT;
build_options); this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
} else { } else {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT; param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " "
// << param->Input()->dims()[2] << " "
// << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " "
// << param->Output()->dims()[2] << " "
// << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " "
// << param->Filter()->dims()[2] << " "
// << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options); build_options);
} }
...@@ -140,7 +125,7 @@ void ConvReluKernel<GPU_CL, float>::Compute( ...@@ -140,7 +125,7 @@ void ConvReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true); WinogradConv3x3<4, 3>(&this->cl_helper_, param, true);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_WITH_GROUP_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true); ConvAddBnRelu(&this->cl_helper_, param, true);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册