import numpy as np import math import tensorflow as tf A_T = {} A = {} B_T = {} B = {} G = {} G_T = {} # f(2, 3) A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32) A[4] = np.transpose(A_T[4]) B_T[4] = np.array([[1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0], [0, 1, 0, -1]]).astype(np.float32) B[4] = np.transpose(B_T[4]) G[4] = np.array([ [1, 0, 0], [0.5, 0.5, 0.5], [0.5, -0.5, 0.5], [0, 0, 1], ]).astype(np.float32) G_T[4] = np.transpose(G[4]) # f(4, 3) A_T[6] = np.array([ [1, 1, 1, 1, 1, 0], [0, 1, -1, 2, -2, 0], [0, 1, 1, 4, 4, 0], [0, 1, -1, 8, -8, 1], ]).astype(np.float32) A[6] = np.transpose(A_T[6]) B_T[6] = np.array([ [4, 0, -5, 0, 1, 0], [0, -4, -4, 1, 1, 0], [0, 4, -4, -1, 1, 0], [0, -2, -1, 2, 1, 0], [0, 2, -1, -2, 1, 0], [0, 4, 0, -5, 0, 1], ]).astype(np.float32) B[6] = np.transpose(B_T[6]) G[6] = np.array([ [1 / 4.0, 0, 0], [-1 / 6.0, -1 / 6.0, -1 / 6.0], [-1 / 6.0, 1 / 6.0, -1 / 6.0], [1 / 24.0, 1 / 12.0, 1 / 6.0], [1 / 24.0, -1 / 12.0, 1 / 6.0], [0, 0, 1], ]).astype(np.float32) G_T[6] = np.transpose(G[6]) # f(6, 3) A_T[8] = np.array([ [1, 1, 1, 1, 1, 1, 1, 0], [0, 1, -1, 2, -2, 1 / 2., -1 / 2., 0], [0, 1, 1, 4, 4, 1 / 4., 1 / 4., 0], [0, 1, -1, 8, -8, 1 / 8., -1 / 8., 0], [0, 1, 1, 16, 16, 1 / 16., 1 / 16., 0], [0, 1, -1, 32, -32, 1 / 32., -1 / 32., 1], ]).astype(np.float32) A[8] = np.transpose(A_T[8]) B_T[8] = np.array([ [1, 0, -21 / 4., 0, 21 / 4., 0, -1, 0], [0, 1, 1, -17 / 4., -17 / 4., 1, 1, 0], [0, -1, 1, 17 / 4., -17 / 4., -1, 1, 0], [0, 1 / 2., 1 / 4., -5 / 2., -5 / 4., 2, 1, 0], [0, -1 / 2., 1 / 4., 5 / 2., -5 / 4., -2, 1, 0], [0, 2, 4, -5 / 2., -5, 1 / 2., 1, 0], [0, -2, 4, 5 / 2., -5, -1 / 2., 1, 0], [0, -1, 0, 21 / 4., 0, -21 / 4., 0, 1], ]).astype(np.float32) B[8] = np.transpose(B_T[8]) G[8] = np.array([ [1, 0, 0], [-2 / 9., -2 / 9., -2 / 9.], [-2 / 9., 2 / 9., -2 / 9.], [1 / 90., 1 / 45., 2 / 45.], [1 / 90., -1 / 45., 2 / 45.], [32 / 45., 16 / 45., 8 / 45.], [32 / 45., -16 / 45., 8 / 45.], [0, 0, 1], ]).astype(np.float32) G_T[8] = np.transpose(G[8]) def output_shape(input_shape, filter_shape): out_shape = np.zeros(4).astype(np.int32) out_shape[0] = input_shape[0] out_shape[1] = filter_shape[0] out_shape[2] = input_shape[2] - 2 out_shape[3] = input_shape[3] - 2 return out_shape def winograd_conv(m, r, input, filter): alpha = m + r - 1 print 'Winograd(m = %d, r = %d, tile size=%d' % (m, r, alpha) alpha_square = alpha * alpha input_shape = input.shape filter_shape = filter.shape out_shape = output_shape(input_shape, filter_shape) K = filter_shape[0] C = input_shape[1] U = np.zeros((K * alpha_square, C)) for k in range(K): for c in range(C): u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha]) for i in range(alpha): for j in range(alpha): U[(i * alpha + j) * K + k, c] = u[i, j] print 'filter out: ', U.shape rounded_h = int(math.ceil(out_shape[2] / (m * 1.0))) rounded_w = int(math.ceil(out_shape[3] / (m * 1.0))) P = input_shape[0] * rounded_h * rounded_w V = np.zeros((C * alpha_square, P)) for p in range(P): for c in range(C): n = p / (rounded_w * rounded_h) t = p % (rounded_h * rounded_w) h_idx = t / rounded_w w_idx = t % rounded_w h_start = h_idx * m w_start = w_idx * m h_end = min(h_start + alpha, input_shape[2]) w_end = min(w_start + alpha, input_shape[3]) d = np.zeros((alpha, alpha)) d[0:h_end-h_start, 0:w_end-w_start] = \ input[n, c, h_start:h_end, w_start:w_end] v = np.dot(np.dot(B_T[alpha], d), B[alpha]) for i in range(alpha): for j in range(alpha): V[(i * alpha + j) * C + c, p] = v[i, j] tmp = V.reshape(alpha_square, C, P, 1) print 'input out: ', tmp.shape tmp.astype(np.float32).tofile("C") M = np.zeros((alpha_square * K, P)) for i in range(alpha_square): u = U[i * K:(i + 1) * K, :] v = V[i * C:(i + 1) * C, :] M[i * K:(i + 1) * K, :] = np.dot(u, v) print 'M shape: ', M.shape M.astype(np.float32).tofile("gemm") res = np.zeros((out_shape[0], out_shape[2], out_shape[3], out_shape[1])) for k in range(K): for b in range(P): tm = np.zeros((alpha, alpha)) for i in range(alpha): for j in range(alpha): tm[i][j] = M[(i * alpha + j) * K + k, b] y = np.dot(np.dot(A_T[alpha], tm), A[alpha]) for i in range(m): for j in range(m): n = b / (rounded_h * rounded_w) t = b % (rounded_h * rounded_w) p = (t / rounded_w) * m + i q = (t % rounded_w) * m + j if p >= out_shape[2] or q >= out_shape[3]: continue res[n, p, q, k] = y[i, j] print 'Res shape: ', res.shape res.astype(np.float32).tofile("res") return res def tf_conv(input, filter): conv_op = tf.nn.conv2d(input, filter, [1, 1, 1, 1], 'VALID') with tf.Session() as sess: res = sess.run(conv_op) return res def main(): input = np.random.random([5, 23, 29, 15]).astype(np.float32) # input = np.fromfile(file="A", dtype=np.float32) # input = input.reshape(1, 3, 3, 5) print 'input shape: ', input.shape # input.tofile("A") filter = np.random.random([3, 3, 15, 13]).astype(np.float32) tf_out = tf_conv(input, filter) input = input.transpose((0, 3, 1, 2)) filter = filter.transpose((3, 2, 0, 1)) print 'filter shape: ', filter.shape # filter.tofile("filter_in") for i in [2, 4, 6]: print "==========f(%d,3)==========" % i winograd_out = winograd_conv(i, 3, input, filter) res = np.allclose(tf_out, winograd_out) if res: print "=========Pass=========" else: print "=========Failed=======" print "TF: ", tf_out print "Winograd: ", winograd_out if __name__ == '__main__': main()