update conv3x3 and fix CLImageConverterNWBlock

7ad76355 · zhaojiaying01 · 547aeb4a · 7ad76355 · 7ad76355 · 7ad76355
3 changed file
--- a/src/framework/cl/cl_image_converter.cpp
+++ b/src/framework/cl/cl_image_converter.cpp
@@ -248,8 +248,8 @@ void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
    for (int c = 0; c < C; c++) {
      for (int h = 0; h < H; ++h) {
        for (int w = 0; w < W; ++w) {
-          int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 +
+          int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
-                      w * 4 + (n % 4);
+                      w * 4 + n % 4;
          if (n < N) {
            image[index] = Float2Half(*p);
            p++;
@@ -283,8 +283,8 @@ void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
    for (int c = 0; c < C; c++) {
      for (int h = 0; h < H; ++h) {
        for (int w = 0; w < W; ++w) {
-          int index = 4 * c * (width * H) + 4 * (n / 4) * H * W + h * W * 4 +
+          int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
-                      w * 4 + (n % 4);
+                      w * 4 + n % 4;
          *p = Half2Float(image[index]);
          p++;
          if (index >= (width * height * 4)) {

--- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
@@ -150,8 +150,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
  } else if (param->Filter()->dims()[2] == 3 &&
             param->Filter()->dims()[3] == 3) {
-    param->Filter()->InitCLImage(cl_helper_.CLContext(),
+    param->Filter()->InitNImage(cl_helper_.CLContext(),
-                                 cl_helper_.CLCommandQueue());
+                                cl_helper_.CLCommandQueue());
    this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
    DLOG << " conv add bn relu conv_3x3";

--- a/src/operators/kernel/cl/conv_add_kernel.cpp
+++ b/src/operators/kernel/cl/conv_add_kernel.cpp
@@ -45,7 +45,7 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
  } else if (param->Filter()->dims()[2] == 3 &&
             param->Filter()->dims()[3] == 3) {
-    param->Filter()->InitCLImage(cl_helper_.CLContext(),
+    param->Filter()->InitNImage(cl_helper_.CLContext(),
                                 cl_helper_.CLCommandQueue());
    this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl");