From 15a0c2b2b2423b174906cfd100d62866142cb579 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Mon, 19 Mar 2018 09:29:39 +0800
Subject: [PATCH] Winograd script support multiple type.

---
 mace/kernels/opencl/winograd_transform.cc |  10 +-
 tools/wino_conv.py                        | 176 +++++++++++++++-------
 2 files changed, 127 insertions(+), 59 deletions(-)

diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc
index ee7d5d12..aa67b20d 100644
--- a/mace/kernels/opencl/winograd_transform.cc
+++ b/mace/kernels/opencl/winograd_transform.cc
@@ -32,12 +32,12 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(
   const index_t round_w = (output_shape[2] + 1) / 2;
   const index_t out_width = input_tensor->dim(0) * round_h * round_w;
 
-  if (kernel_.get() == nullptr) {
-    output_shape = {16, input_tensor->dim(3), out_width, 1};
-    std::vector<size_t> image_shape;
-    CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape);
-    output_tensor->ResizeImage(output_shape, image_shape);
+  output_shape = {16, input_tensor->dim(3), out_width, 1};
+  std::vector<size_t> image_shape;
+  CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape);
+  output_tensor->ResizeImage(output_shape, image_shape);
 
+  if (kernel_.get() == nullptr) {
     std::string obfuscated_kernel_name =
         MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2");
     std::set<std::string> built_options;
diff --git a/tools/wino_conv.py b/tools/wino_conv.py
index a8cdf3d8..383def86 100644
--- a/tools/wino_conv.py
+++ b/tools/wino_conv.py
@@ -2,22 +2,89 @@ import numpy as np
 import math
 import tensorflow as tf
 
-A_T = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
-A = np.transpose(A_T)
-B_T = np.array([
+A_T = {}
+A = {}
+B_T = {}
+B = {}
+G = {}
+G_T = {}
+# f(2, 3)
+A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32)
+A[4] = np.transpose(A_T[4])
+B_T[4] = np.array([
   [1, 0, -1, 0],
   [0, 1, 1, 0],
   [0, -1, 1, 0],
   [0, 1, 0, -1]
 ]).astype(np.float32)
-B = np.transpose(B_T)
-G = np.array([
+B[4] = np.transpose(B_T[4])
+G[4] = np.array([
   [1, 0, 0],
   [0.5, 0.5, 0.5],
   [0.5, -0.5, 0.5],
   [0, 0, 1],
 ]).astype(np.float32)
-G_T = np.transpose(G)
+G_T[4] = np.transpose(G[4])
+
+# f(4, 3)
+A_T[6] = np.array([
+  [1, 1,  1, 1,  1, 0],
+  [0, 1, -1, 2, -2, 0],
+  [0, 1,  1, 4,  4, 0],
+  [0, 1, -1, 8, -8, 1],
+]).astype(np.float32)
+A[6] = np.transpose(A_T[6])
+B_T[6] = np.array([
+  [4,  0, -5,  0, 1, 0],
+  [0, -4, -4,  1, 1, 0],
+  [0,  4, -4, -1, 1, 0],
+  [0, -2, -1,  2, 1, 0],
+  [0,  2, -1, -2, 1, 0],
+  [0,  4,  0, -5, 0, 1],
+]).astype(np.float32)
+B[6] = np.transpose(B_T[6])
+G[6] = np.array([
+  [1/4.0 ,   0    ,  0    ],
+  [-1/6.0, -1/6.0 , -1/6.0],
+  [-1/6.0,  1/6.0 , -1/6.0],
+  [1/24.0, 1/12.0 , 1/6.0 ],
+  [1/24.0, -1/12.0, 1/6.0 ],
+  [ 0    ,  0     ,  1    ],
+]).astype(np.float32)
+G_T[6] = np.transpose(G[6])
+
+# f(6, 3)
+A_T[8] = np.array([
+  [1, 1, 1 , 1 ,  1 ,  1  ,   1  , 0],
+  [0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0],
+  [0, 1, 1 , 4 ,  4 , 1/4. ,  1/4. , 0],
+  [0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0],
+  [0, 1, 1 , 16, 16 , 1/16., 1/16. , 0],
+  [0, 1, -1, 32, -32, 1/32., -1/32., 1],
+]).astype(np.float32)
+A[8] = np.transpose(A_T[8])
+B_T[8] = np.array([
+  [1,  0  , -21/4.,   0  ,  21/4.,   0  , -1, 0],
+  [0,  1  ,   1  , -17/4., -17/4.,   1  , 1 , 0],
+  [0,  -1 ,   1  , 17/4. , -17/4.,  -1  , 1 , 0],
+  [0, 1/2. ,  1/4. , -5/2. , -5/4.,   2  , 1 , 0],
+  [0, -1/2.,  1/4. ,  5/2. , -5/4.,  -2  , 1 , 0],
+  [0,  2  ,   4  , -5/2. ,  -5  ,  1/2. , 1 , 0],
+  [0,  -2 ,   4  ,  5/2. ,  -5  , -1/2. , 1 , 0],
+  [0,  -1 ,   0  , 21/4. ,   0  , -21/4., 0 , 1],
+]).astype(np.float32)
+B[8] = np.transpose(B_T[8])
+G[8] = np.array([
+ [ 1    ,   0    ,  0  ],
+ [-2/9. , -2/9.  , -2/9.],
+ [-2/9. ,  2/9.  , -2/9.],
+ [1/90. , 1/45.  , 2/45.],
+ [1/90. , -1/45. , 2/45.],
+ [32/45., 16/45. , 8/45.],
+ [32/45., -16/45., 8/45.],
+ [ 0    ,  0     ,  1   ],
+]).astype(np.float32)
+G_T[8] = np.transpose(G[8])
 
 
 def output_shape(input_shape, filter_shape):
@@ -29,55 +96,54 @@ def output_shape(input_shape, filter_shape):
   return out_shape
 
 
-def winog_conv(input, filter):
-  m = 2
-  r = 3
+def winog_conv(m, r, input, filter):
   alpha = m + r - 1
+  print 'Winograd(m = %d, r = %d, tile size=%d' % (m, r, alpha)
+  alpha_square = alpha * alpha
   input_shape = input.shape
   filter_shape = filter.shape
   out_shape = output_shape(input_shape, filter_shape)
 
   K = filter_shape[0]
   C = input_shape[1]
-  U = np.zeros((K * 16, C))
+  U = np.zeros((K * alpha_square, C))
 
   for k in range(K):
     for c in range(C):
-      u = np.dot(np.dot(G, filter[k, c, :, :]), G_T)
-      for i in range(4):
-        for j in range(4) :
-          U[(i * 4 + j) * K + k, c] = u[i, j]
+      u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha])
+      for i in range(alpha):
+        for j in range(alpha) :
+          U[(i * alpha + j) * K + k, c] = u[i, j]
 
   print 'filter out: ', U.shape
-  print U[0, 0]
-  U.astype(np.float32).tofile("filter_out")
 
-  rounded_h = int(math.ceil(out_shape[2] / 2.0))
-  rounded_w = int(math.ceil(out_shape[3] / 2.0))
+  rounded_h = int(math.ceil(out_shape[2] / (m * 1.0)))
+  rounded_w = int(math.ceil(out_shape[3] / (m * 1.0)))
   P = input_shape[0] * rounded_h * rounded_w
-  V = np.zeros((C * 16, P))
+  V = np.zeros((C * alpha_square, P))
   for p in range(P):
     for c in range(C):
       n = p / (rounded_w * rounded_h)
       t = p % (rounded_h * rounded_w)
       h_idx = t / rounded_w
       w_idx = t % rounded_w
-      h_start = h_idx * 2
-      w_start = w_idx * 2
-      h_end = min(h_start+4, input_shape[2])
-      w_end = min(w_start+4, input_shape[3])
-      d = np.zeros((4, 4))
-      d[0:h_end-h_start, 0:w_end-w_start] = input[n, c, h_start:h_end, w_start:w_end]
-      v = np.dot(np.dot(B_T, d), B)
-      for i in range(4):
-        for j in range(4):
-          V[(i*4+j)*C + c, p] = v[i, j]
-
-  tmp = V.reshape(16, C, P, 1)
+      h_start = h_idx * m
+      w_start = w_idx * m
+      h_end = min(h_start+alpha, input_shape[2])
+      w_end = min(w_start+alpha, input_shape[3])
+      d = np.zeros((alpha, alpha))
+      d[0:h_end-h_start, 0:w_end-w_start] = \
+              input[n, c, h_start:h_end, w_start:w_end]
+      v = np.dot(np.dot(B_T[alpha], d), B[alpha])
+      for i in range(alpha):
+        for j in range(alpha):
+          V[(i*alpha+j)*C + c, p] = v[i, j]
+
+  tmp = V.reshape(alpha_square, C, P, 1)
   print 'input out: ', tmp.shape
   tmp.astype(np.float32).tofile("C")
-  M = np.zeros((16 * K, P))
-  for i in range(alpha * alpha):
+  M = np.zeros((alpha_square * K, P))
+  for i in range(alpha_square):
     u = U[i * K : (i+1) * K, :]
     v = V[i * C : (i+1) * C, :]
     M[i * K : (i+1) * K, :] = np.dot(u, v)
@@ -87,17 +153,17 @@ def winog_conv(input, filter):
   res = np.zeros((out_shape[0], out_shape[2], out_shape[3], out_shape[1]))
   for k in range(K):
     for b in range(P):
-      m = np.zeros((4, 4))
-      for i in range(4):
-        for j in range(4):
-          m[i][j] = M[(i*4+j) * K + k, b]
-      y = np.dot(np.dot(A_T, m), A)
-      for i in range(2):
-        for j in range(2):
+      tm = np.zeros((alpha, alpha))
+      for i in range(alpha):
+        for j in range(alpha):
+          tm[i][j] = M[(i*alpha+j) * K + k, b]
+      y = np.dot(np.dot(A_T[alpha], tm), A[alpha])
+      for i in range(m):
+        for j in range(m):
           n = b / (rounded_h * rounded_w)
           t = b % (rounded_h * rounded_w)
-          p = (t / rounded_w) * 2 + i
-          q = (t % rounded_w) * 2 + j
+          p = (t / rounded_w) * m + i
+          q = (t % rounded_w) * m + j
           if p >= out_shape[2] or q >= out_shape[3]:
             continue
           res[n, p, q, k] = y[i, j]
@@ -115,25 +181,27 @@ def tf_conv(input, filter):
 
 
 def main():
-  input = np.random.random([7, 61, 71, 31]).astype(np.float32)
+  input = np.random.random([5, 23, 29, 15]).astype(np.float32)
   # input = np.fromfile(file="A", dtype=np.float32)
   # input = input.reshape(1, 3, 3, 5)
   print 'input shape: ', input.shape
-  input.tofile("A")
-  filter = np.random.random([3, 3, 31, 31]).astype(np.float32)
+  # input.tofile("A")
+  filter = np.random.random([3, 3, 15, 13]).astype(np.float32)
   tf_out = tf_conv(input, filter)
   input = input.transpose((0, 3, 1, 2))
   filter = filter.transpose((3, 2, 0, 1))
   print 'filter shape: ', filter.shape
-  filter.tofile("filter_in")
-  winog_out = winog_conv(input, filter)
-  res = np.allclose(tf_out, winog_out)
-  if res:
-    print "=========Pass========="
-  else:
-    print "=========Failed========="
-    print "TF: ", tf_out
-    print "Winograd: ", winog_out
+  # filter.tofile("filter_in")
+  for i in [2, 4, 6]:
+    print "==========f(%d,3)==========" % i
+    winog_out = winog_conv(i, 3, input, filter)
+    res = np.allclose(tf_out, winog_out)
+    if res:
+      print "=========Pass========="
+    else:
+      print "=========Failed======="
+      print "TF: ", tf_out
+      print "Winograd: ", winog_out
 
 
 if __name__ == '__main__':
-- 
GitLab