未验证 提交 aad9c8a4 编写于 作者: X xiebaiyuan 提交者: GitHub

support gender (#3054)

* [mobile][opencl] increase male2fe gan model

* element mul d3

* remove useless initempty image in transpose2 ,test=develop

* update test ,test=develop
上级 3d06dcfe
......@@ -112,6 +112,25 @@ __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias,
write_imageh(outputImage, coords, output);
}
// c 1 1
__kernel void channel_mul_d3(__global image2d_t input, __global image2d_t bias,
__write_only image2d_t outputImage, int w) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
int2 coords_bias;
coords_bias.x = x / w;
coords_bias.y = 0;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords_bias);
half4 output = in * biase;
write_imageh(outputImage, coords, output);
}
__kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias,
__write_only image2d_t outputImage, int w) {
int x = get_global_id(0);
......
......@@ -203,8 +203,20 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// }
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
......@@ -222,7 +234,7 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
......@@ -232,6 +244,9 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
......
......@@ -99,8 +99,20 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// }
} else if (param->Filter()->dims()[2] == 7 &&
......@@ -134,7 +146,7 @@ void ConvAddReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
......@@ -144,6 +156,9 @@ void ConvAddReluKernel<GPU_CL, float>::Compute(
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT:
DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
......
......@@ -157,7 +157,7 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// }
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
......@@ -174,7 +174,7 @@ void ConvBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
......@@ -184,6 +184,10 @@ void ConvBNReluKernel<GPU_CL, float>::Compute(
DWConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias());
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias());
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
......
......@@ -93,8 +93,20 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file);
// std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file);
// }
DLOG << "conv 3x3";
} else if (param->Filter()->dims()[2] == 7 &&
......@@ -120,7 +132,7 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
WinogradConv3x3<4, 3>(&this->cl_helper_, param);
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
......@@ -129,6 +141,9 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT:
DWConvAddBnRelu(&this->cl_helper_, param);
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
......
......@@ -106,7 +106,21 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options);
// std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " "
// << param->Input()->dims()[2] << " "
// << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " "
// << param->Output()->dims()[2] << " "
// << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " "
// << param->Filter()->dims()[2] << " "
// << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
}
// }
DLOG << "conv 3x3";
......@@ -126,7 +140,7 @@ void ConvReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true);
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
// case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true);
......@@ -137,6 +151,9 @@ void ConvReluKernel<GPU_CL, float>::Compute(
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3S1_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true);
break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
......
......@@ -40,6 +40,9 @@ bool ElementwiseMulKernel<GPU_CL, float>::Init(
// filter 1 72
DLOG << "init channel_mul_d2";
this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 3) {
DLOG << "init channel_mul_d3";
this->cl_helper_.AddKernel("channel_mul_d3", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 4) {
DLOG << "init channel_mul_d4";
this->cl_helper_.AddKernel("channel_mul_d4", "elementwise_mul_kernel.cl");
......@@ -140,6 +143,38 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute(
CL_CHECK_ERRORS(status);
// bias->PrintTensor(*bias);
} else if (bias_dim_size == 3) {
DLOG << "channel_mul_d3";
// etc. input 1 72 28 28
// filter 1 72 --> 1 1 1 72
DLOG << "input->ImageDims(): " << input->ImageDims();
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d3";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias_dim_size == 4) {
DLOG << "channel_mul_d4";
// etc. input 1 72 28 28
......@@ -148,7 +183,7 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute(
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d2";
DLOG << "channel mul d4";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
......
......@@ -181,7 +181,7 @@ void Transpose2Compute(const Transpose2Param<GPU_CL> &param, cl_context context,
}
}
output->InitEmptyImage(context, commandQueue, output_tensor->dims());
// output->InitEmptyImage(context, commandQueue, output_tensor->dims());
framework::TensorToCLImage(output_tensor, output, context, commandQueue,
kernel1);
delete (input_tensor);
......@@ -197,14 +197,18 @@ void Transpose2Kernel<GPU_CL, float>::Compute(
const std::vector<int> &axis = param.Axis();
bool shuffle_channel = IsShuffleChannel(axis);
if (shuffle_channel) {
DLOG << "transpose shuffle_channel .. ";
ShuffleChannelCompute<float>(param, this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue(), kernel0,
kernel1);
} else {
DLOG << "transpose 2 compute .. ";
Transpose2Compute<float>(param, this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue(), kernel0,
kernel1);
}
DLOG << "transpose end .. ";
}
template class Transpose2Kernel<GPU_CL, float>;
......
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <string>
#include "../test_helper.h"
#include "../test_include.h"
void test(int argc, char *argv[]);
int main(int argc, char *argv[]) {
......@@ -175,6 +175,7 @@ void test(int argc, char *argv[]) {
auto time7 = time();
paddle_mobile.Predict();
auto time8 = time();
usleep(1000 * quantification_fold);
const double diff_time_single = time_diff(time7, time8);
max_time = fmax(diff_time_single, max_time);
min_time = fmin(diff_time_single, min_time);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册