提交 7ad76355 编写于 作者: Z zhaojiaying01

update conv3x3 and fix CLImageConverterNWBlock

上级 547aeb4a
...@@ -248,8 +248,8 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image, ...@@ -248,8 +248,8 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
for (int c = 0; c < C; c++) { for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) { for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) { for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 + int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + (n % 4); w * 4 + n % 4;
if (n < N) { if (n < N) {
image[index] = Float2Half(*p); image[index] = Float2Half(*p);
p++; p++;
...@@ -283,8 +283,8 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor, ...@@ -283,8 +283,8 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
for (int c = 0; c < C; c++) { for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) { for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) { for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 + int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + (n % 4); w * 4 + n % 4;
*p = Half2Float(image[index]); *p = Half2Float(image[index]);
p++; p++;
if (index >= (width * height * 4)) { if (index >= (width * height * 4)) {
......
...@@ -150,8 +150,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -150,8 +150,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
} else if (param->Filter()->dims()[2] == 3 && } else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) { param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv_3x3"; DLOG << " conv add bn relu conv_3x3";
......
...@@ -45,7 +45,7 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) { ...@@ -45,7 +45,7 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
} else if (param->Filter()->dims()[2] == 3 && } else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) { param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(), param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue()); cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl"); this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册