提交 2995645a 编写于 作者: X xiebaiyuan 提交者: GitHub

support gender (#3054)

* [mobile][opencl] increase male2fe gan model

* element mul d3

* remove useless initempty image in transpose2 ,test=develop

* update test ,test=develop
上级 7bba07bf
...@@ -112,6 +112,25 @@ __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias, ...@@ -112,6 +112,25 @@ __kernel void channel_mul_d2(__global image2d_t input, __global image2d_t bias,
write_imageh(outputImage, coords, output); write_imageh(outputImage, coords, output);
} }
// c 1 1
__kernel void channel_mul_d3(__global image2d_t input, __global image2d_t bias,
__write_only image2d_t outputImage, int w) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
int2 coords_bias;
coords_bias.x = x / w;
coords_bias.y = 0;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords_bias);
half4 output = in * biase;
write_imageh(outputImage, coords, output);
}
__kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias, __kernel void channel_mul_d4(__global image2d_t input, __global image2d_t bias,
__write_only image2d_t outputImage, int w) { __write_only image2d_t outputImage, int w) {
int x = get_global_id(0); int x = get_global_id(0);
......
...@@ -203,8 +203,20 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -203,8 +203,20 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT; param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " "
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options); // << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// } // }
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
...@@ -222,7 +234,7 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute( ...@@ -222,7 +234,7 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias()); param.NewScale(), param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: // case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(), ConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
...@@ -232,6 +244,9 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute( ...@@ -232,6 +244,9 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(), DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias(),
param.NewScale(), param.NewBias()); param.NewScale(), param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode()); param.ExecMode());
......
...@@ -99,8 +99,20 @@ bool ConvAddReluKernel<GPU_CL, float>::Init( ...@@ -99,8 +99,20 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT; param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " "
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options); // << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// } // }
} else if (param->Filter()->dims()[2] == 7 && } else if (param->Filter()->dims()[2] == 7 &&
...@@ -134,7 +146,7 @@ void ConvAddReluKernel<GPU_CL, float>::Compute( ...@@ -134,7 +146,7 @@ void ConvAddReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias()); WinogradConv3x3<4, 3>(&this->cl_helper_, param, true, param.Bias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW5x5_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
...@@ -144,6 +156,9 @@ void ConvAddReluKernel<GPU_CL, float>::Compute( ...@@ -144,6 +156,9 @@ void ConvAddReluKernel<GPU_CL, float>::Compute(
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT:
DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias()); DWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, param.Bias());
break;
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode()); param.ExecMode());
......
...@@ -157,7 +157,7 @@ bool ConvBNReluKernel<GPU_CL, float>::Init( ...@@ -157,7 +157,7 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options); this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file, build_options);
// } // }
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
...@@ -174,7 +174,7 @@ void ConvBNReluKernel<GPU_CL, float>::Compute( ...@@ -174,7 +174,7 @@ void ConvBNReluKernel<GPU_CL, float>::Compute(
param.NewScale(), param.NewBias()); param.NewScale(), param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: // case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(), ConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
...@@ -184,6 +184,10 @@ void ConvBNReluKernel<GPU_CL, float>::Compute( ...@@ -184,6 +184,10 @@ void ConvBNReluKernel<GPU_CL, float>::Compute(
DWConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(), DWConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias()); param.NewBias());
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true, nullptr, param.NewScale(),
param.NewBias());
break;
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode()); param.ExecMode());
......
...@@ -93,8 +93,20 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -93,8 +93,20 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT; param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
// std::cout << " input dim " << param->Input()->dims()[0] << " "
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file); // << param->Input()->dims()[1] << " " <<
// param->Input()->dims()[2]
// << " " << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " " <<
// param->Output()->dims()[2]
// << " " << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " " <<
// param->Filter()->dims()[2]
// << " " << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file);
// } // }
DLOG << "conv 3x3"; DLOG << "conv 3x3";
} else if (param->Filter()->dims()[2] == 7 && } else if (param->Filter()->dims()[2] == 7 &&
...@@ -120,7 +132,7 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -120,7 +132,7 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
WinogradConv3x3<4, 3>(&this->cl_helper_, param); WinogradConv3x3<4, 3>(&this->cl_helper_, param);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: // case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW7x7_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
...@@ -129,6 +141,9 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -129,6 +141,9 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3S1_FLOAT:
DWConvAddBnRelu(&this->cl_helper_, param); DWConvAddBnRelu(&this->cl_helper_, param);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param);
break;
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode()); param.ExecMode());
......
...@@ -106,7 +106,21 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) { ...@@ -106,7 +106,21 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) {
param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT; param->ExecMode() = ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT;
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options); // std::cout << " input dim " << param->Input()->dims()[0] << " "
// << param->Input()->dims()[1] << " "
// << param->Input()->dims()[2] << " "
// << param->Input()->dims()[3] << " " << std::endl;
// std::cout << " output dim " << param->Output()->dims()[0] << " "
// << param->Output()->dims()[1] << " "
// << param->Output()->dims()[2] << " "
// << param->Output()->dims()[3] << " " << std::endl;
// std::cout << " filter dim " << param->Filter()->dims()[0] << " "
// << param->Filter()->dims()[1] << " "
// << param->Filter()->dims()[2] << " "
// << param->Filter()->dims()[3] << " " << std::endl;
this->cl_helper_.AddKernel("conv_3x3spl", conv_kernel_file,
build_options);
} }
// } // }
DLOG << "conv 3x3"; DLOG << "conv 3x3";
...@@ -126,7 +140,7 @@ void ConvReluKernel<GPU_CL, float>::Compute( ...@@ -126,7 +140,7 @@ void ConvReluKernel<GPU_CL, float>::Compute(
WinogradConv3x3<4, 3>(&this->cl_helper_, param, true); WinogradConv3x3<4, 3>(&this->cl_helper_, param, true);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW1x1_FLOAT:
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT: // case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISE3x3_FLOAT:
case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT: case ConvParam<GPU_CL>::EXEC_DEPTHWISEBASIC_FLOAT:
ConvAddBnRelu(&this->cl_helper_, param, true); ConvAddBnRelu(&this->cl_helper_, param, true);
...@@ -137,6 +151,9 @@ void ConvReluKernel<GPU_CL, float>::Compute( ...@@ -137,6 +151,9 @@ void ConvReluKernel<GPU_CL, float>::Compute(
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3S1_FLOAT: case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3S1_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true); SWConvAddBnRelu(&this->cl_helper_, param, true);
break; break;
case ConvParam<GPU_CL>::EXEC_SLIDINGWINDOW3x3_FLOAT:
SWConvAddBnRelu(&this->cl_helper_, param, true);
break;
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode()); param.ExecMode());
......
...@@ -40,6 +40,9 @@ bool ElementwiseMulKernel<GPU_CL, float>::Init( ...@@ -40,6 +40,9 @@ bool ElementwiseMulKernel<GPU_CL, float>::Init(
// filter 1 72 // filter 1 72
DLOG << "init channel_mul_d2"; DLOG << "init channel_mul_d2";
this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl"); this->cl_helper_.AddKernel("channel_mul_d2", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 3) {
DLOG << "init channel_mul_d3";
this->cl_helper_.AddKernel("channel_mul_d3", "elementwise_mul_kernel.cl");
} else if (bias_dim_size == 4) { } else if (bias_dim_size == 4) {
DLOG << "init channel_mul_d4"; DLOG << "init channel_mul_d4";
this->cl_helper_.AddKernel("channel_mul_d4", "elementwise_mul_kernel.cl"); this->cl_helper_.AddKernel("channel_mul_d4", "elementwise_mul_kernel.cl");
...@@ -140,6 +143,38 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute( ...@@ -140,6 +143,38 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute(
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
// bias->PrintTensor(*bias); // bias->PrintTensor(*bias);
} else if (bias_dim_size == 3) {
DLOG << "channel_mul_d3";
// etc. input 1 72 28 28
// filter 1 72 --> 1 1 1 72
DLOG << "input->ImageDims(): " << input->ImageDims();
DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d3";
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[input->dims().size() - 1];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int),
reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias_dim_size == 4) { } else if (bias_dim_size == 4) {
DLOG << "channel_mul_d4"; DLOG << "channel_mul_d4";
// etc. input 1 72 28 28 // etc. input 1 72 28 28
...@@ -148,7 +183,7 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute( ...@@ -148,7 +183,7 @@ void ElementwiseMulKernel<GPU_CL, float>::Compute(
DLOG << "bias->ImageDims(): " << bias->ImageDims(); DLOG << "bias->ImageDims(): " << bias->ImageDims();
DLOG << "out->ImageDims(): " << output->ImageDims(); DLOG << "out->ImageDims(): " << output->ImageDims();
DLOG << "channel mul d2"; DLOG << "channel mul d4";
cl_mem input_image = input->GetCLImage(); cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage(); cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage(); cl_mem output_image = output->GetCLImage();
......
...@@ -181,7 +181,7 @@ void Transpose2Compute(const Transpose2Param<GPU_CL> &param, cl_context context, ...@@ -181,7 +181,7 @@ void Transpose2Compute(const Transpose2Param<GPU_CL> &param, cl_context context,
} }
} }
output->InitEmptyImage(context, commandQueue, output_tensor->dims()); // output->InitEmptyImage(context, commandQueue, output_tensor->dims());
framework::TensorToCLImage(output_tensor, output, context, commandQueue, framework::TensorToCLImage(output_tensor, output, context, commandQueue,
kernel1); kernel1);
delete (input_tensor); delete (input_tensor);
...@@ -197,14 +197,18 @@ void Transpose2Kernel<GPU_CL, float>::Compute( ...@@ -197,14 +197,18 @@ void Transpose2Kernel<GPU_CL, float>::Compute(
const std::vector<int> &axis = param.Axis(); const std::vector<int> &axis = param.Axis();
bool shuffle_channel = IsShuffleChannel(axis); bool shuffle_channel = IsShuffleChannel(axis);
if (shuffle_channel) { if (shuffle_channel) {
DLOG << "transpose shuffle_channel .. ";
ShuffleChannelCompute<float>(param, this->cl_helper_.CLContext(), ShuffleChannelCompute<float>(param, this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue(), kernel0, this->cl_helper_.CLCommandQueue(), kernel0,
kernel1); kernel1);
} else { } else {
DLOG << "transpose 2 compute .. ";
Transpose2Compute<float>(param, this->cl_helper_.CLContext(), Transpose2Compute<float>(param, this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue(), kernel0, this->cl_helper_.CLCommandQueue(), kernel0,
kernel1); kernel1);
} }
DLOG << "transpose end .. ";
} }
template class Transpose2Kernel<GPU_CL, float>; template class Transpose2Kernel<GPU_CL, float>;
......
...@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <unistd.h>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "../test_helper.h" #include "../test_helper.h"
#include "../test_include.h" #include "../test_include.h"
void test(int argc, char *argv[]); void test(int argc, char *argv[]);
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
...@@ -175,6 +175,7 @@ void test(int argc, char *argv[]) { ...@@ -175,6 +175,7 @@ void test(int argc, char *argv[]) {
auto time7 = time(); auto time7 = time();
paddle_mobile.Predict(); paddle_mobile.Predict();
auto time8 = time(); auto time8 = time();
usleep(1000 * quantification_fold);
const double diff_time_single = time_diff(time7, time8); const double diff_time_single = time_diff(time7, time8);
max_time = fmax(diff_time_single, max_time); max_time = fmax(diff_time_single, max_time);
min_time = fmin(diff_time_single, min_time); min_time = fmin(diff_time_single, min_time);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册