# Copyright 2019 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from akg.utils import kernel_exec as utils from tensorio import compare_tensor import numpy as np from test_op.lstm_rnn_grad import lstmcell_grad_h from test_op.lstm_rnn_grad import lstmcell_grad_c, rnn_tanh_cell_grad, rnn_relu_cell_grad from test_op.lstm_rnn_ad import rnncell_tanh_ad, rnncell_relu_ad, lstmcell_h_ad, lstmcell_c_ad from gen_random import random_gaussian def np_sigmoid(x): return 1.0 / (1.0 + np.exp(-x)) def init_lstmcell_shapes(shape): # shape: batch_size, input_size, hidden_size batch_size, input_size, hidden_size = shape W_ih_shape = (4 * hidden_size, input_size,) W_hh_shape = (4 * hidden_size, hidden_size,) b_shape = (4 * hidden_size,) c_prev_shape = (batch_size, hidden_size,) h_prev_shape = (batch_size, hidden_size,) x_shape = (batch_size, input_size,) gradh_shape = (batch_size, hidden_size,) gradc_shape = (batch_size, hidden_size,) return x_shape, c_prev_shape, h_prev_shape, W_ih_shape, W_hh_shape, b_shape, b_shape, gradh_shape, gradc_shape def init_lstmcell_data(shapes, dtype): x_shape, c_prev_shape, h_prev_shape, W_ih_shape, W_hh_shape, b_ih_shape, b_hh_shape, gradh_shape, gradc_shape = shapes batch_size, input_size = x_shape _, hidden_size = c_prev_shape np_miu = 1.0 / hidden_size np_sigma = np_miu / 4.0 W_ih = random_gaussian(W_ih_shape, miu=np_miu, sigma=np_sigma).astype(dtype) W_hh = random_gaussian(W_hh_shape, miu=np_miu, sigma=np_sigma).astype(dtype) b_ih = random_gaussian(b_ih_shape, miu=np_miu, sigma=np_sigma).astype(dtype) b_hh = random_gaussian(b_hh_shape, miu=np_miu, sigma=np_sigma).astype(dtype) c_prev = random_gaussian(c_prev_shape, miu=np_miu, sigma=np_sigma).astype(dtype) h_prev = random_gaussian(h_prev_shape, miu=np_miu, sigma=np_sigma).astype(dtype) x = random_gaussian(x_shape, miu=np_miu, sigma=np_sigma).astype(dtype) gradh = random_gaussian(gradh_shape, miu=5 * np_miu, sigma=5 * np_sigma).astype(dtype) gradc = random_gaussian(gradc_shape, miu=5 * np_miu, sigma=5 * np_sigma).astype(dtype) dW_ih = np.full(W_ih.shape, np.nan, dtype) dW_hh = np.full(W_hh.shape, np.nan, dtype) db_ih = np.full(b_ih.shape, np.nan, dtype) db_hh = np.full(b_hh.shape, np.nan, dtype) dc_prev = np.full(c_prev.shape, np.nan, dtype) dh_prev = np.full(h_prev.shape, np.nan, dtype) dx = np.full(x.shape, np.nan, dtype) return x, c_prev, h_prev, W_ih, W_hh, b_ih, b_hh, gradh, gradc, dW_ih, dW_hh, db_ih, db_hh, dc_prev, dh_prev, dx def lstm_backward_data_h(np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh): np_igates = np.dot(np_input, w_ih.transpose(1, 0)) + b_ih np_hgates = np.dot(hx, w_hh.transpose(1, 0)) + b_hh np_gates = np_igates + np_hgates np_ingate, np_forgetgate, np_cellgate, np_outgate = np.split(np_gates, 4, axis=1) np_w_ih_i, np_w_ih_f, np_w_ih_c, np_w_ih_o = np.split(w_ih, 4, axis=0) np_b_ih_i, np_b_ih_f, np_b_ih_c, np_b_ih_o = np.split(b_ih, 4, axis=0) np_w_hh_i, np_w_hh_f, np_w_hh_c, np_w_hh_o = np.split(w_hh, 4, axis=0) np_b_hh_i, np_b_hh_f, np_b_hh_c, np_b_hh_o = np.split(b_hh, 4, axis=0) np_sigm_ingate = np_sigmoid(np_ingate) np_sigm_forgetgate = np_sigmoid(np_forgetgate) np_tanh_cellgate = np.tanh(np_cellgate) np_sigm_outgate = np_sigmoid(np_outgate) np_c_out = np_sigm_forgetgate * cx + np_sigm_ingate * np_tanh_cellgate np_h_out = np_sigm_outgate * np.tanh(np_c_out) expect_dsigm_outgate = gradh * np.tanh(np_c_out) expect_dc_out = np_sigm_outgate * gradh * (1.0 - np.tanh(np_c_out) * np.tanh(np_c_out)) expect_dsigm_forgetgate = expect_dc_out * cx expect_dcx = np_sigm_forgetgate * expect_dc_out expect_dsigm_ingate = expect_dc_out * np_tanh_cellgate expect_dtanh_cellgate = np_sigm_ingate * expect_dc_out expect_dingate = expect_dsigm_ingate * np_sigm_ingate * (1 - np_sigm_ingate) expect_dforgetgate = expect_dsigm_forgetgate * np_sigm_forgetgate * (1 - np_sigm_forgetgate) expect_dcellgate = expect_dtanh_cellgate * (1 - np_tanh_cellgate * np_tanh_cellgate) expect_doutgate = expect_dsigm_outgate * np_sigm_outgate * (1.0 - np_sigm_outgate) expect_dw_ih_i = np.dot(expect_dingate.transpose(1, 0), np_input) expect_dw_ih_f = np.dot(expect_dforgetgate.transpose(1, 0), np_input) expect_dw_ih_c = np.dot(expect_dcellgate.transpose(1, 0), np_input) expect_dw_ih_o = np.dot(expect_doutgate.transpose(1, 0), np_input) expect_dw_ih = np.concatenate((expect_dw_ih_i, expect_dw_ih_f, expect_dw_ih_c, expect_dw_ih_o), axis=0) expect_db_ih_i = np.sum(expect_dingate, axis=0) expect_db_ih_f = np.sum(expect_dforgetgate, axis=0) expect_db_ih_c = np.sum(expect_dcellgate, axis=0) expect_db_ih_o = np.sum(expect_doutgate, axis=0) expect_db_ih = np.concatenate((expect_db_ih_i, expect_db_ih_f, expect_db_ih_c, expect_db_ih_o), axis=0) expect_dinput = np.dot(expect_dingate, np_w_ih_i) + np.dot(expect_dforgetgate, np_w_ih_f) +\ np.dot(expect_dcellgate, np_w_ih_c) + np.dot(expect_doutgate, np_w_ih_o) expect_dw_hh_i = np.dot(expect_dingate.transpose(1, 0), hx) expect_dw_hh_f = np.dot(expect_dforgetgate.transpose(1, 0), hx) expect_dw_hh_c = np.dot(expect_dcellgate.transpose(1, 0), hx) expect_dw_hh_o = np.dot(expect_doutgate.transpose(1, 0), hx) expect_dw_hh = np.concatenate((expect_dw_hh_i, expect_dw_hh_f, expect_dw_hh_c, expect_dw_hh_o), axis=0) expect_db_hh = expect_db_ih expect_dhx = np.dot(expect_dingate, np_w_hh_i) + np.dot(expect_dforgetgate, np_w_hh_f) +\ np.dot(expect_dcellgate, np_w_hh_c) + np.dot(expect_doutgate, np_w_hh_o) return [expect_dinput, expect_dhx, expect_dcx, expect_dw_ih, expect_dw_hh, expect_db_ih, expect_db_hh] def lstm_backward_data_c(np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradc): np_igates = np.dot(np_input, w_ih.transpose(1, 0)) + b_ih np_hgates = np.dot(hx, w_hh.transpose(1, 0)) + b_hh np_gates = np_igates + np_hgates np_ingate, np_forgetgate, np_cellgate, np_outgate = np.split(np_gates, 4, axis=1) np_w_ih_i, np_w_ih_f, np_w_ih_c, np_w_ih_o = np.split(w_ih, 4, axis=0) np_b_ih_i, np_b_ih_f, np_b_ih_c, np_b_ih_o = np.split(b_ih, 4, axis=0) np_w_hh_i, np_w_hh_f, np_w_hh_c, np_w_hh_o = np.split(w_hh, 4, axis=0) np_b_hh_i, np_b_hh_f, np_b_hh_c, np_b_hh_o = np.split(b_hh, 4, axis=0) np_sigm_ingate = np_sigmoid(np_ingate) np_sigm_forgetgate = np_sigmoid(np_forgetgate) np_tanh_cellgate = np.tanh(np_cellgate) np_sigm_outgate = np_sigmoid(np_outgate) np_c_out = np_sigm_forgetgate * cx + np_sigm_ingate * np_tanh_cellgate expect_dc_out = gradc expect_dsigm_outgate = np_sigm_outgate * 0.0 expect_dsigm_forgetgate = expect_dc_out * cx expect_dcx = np_sigm_forgetgate * expect_dc_out expect_dsigm_ingate = expect_dc_out * np_tanh_cellgate expect_dtanh_cellgate = np_sigm_ingate * expect_dc_out expect_dingate = expect_dsigm_ingate * np_sigm_ingate * (1.0 - np_sigm_ingate) expect_dforgetgate = expect_dsigm_forgetgate * np_sigm_forgetgate * (1.0 - np_sigm_forgetgate) expect_dcellgate = expect_dtanh_cellgate * (1.0 - np_tanh_cellgate * np_tanh_cellgate) expect_doutgate = expect_dsigm_outgate * np_sigm_outgate * (1.0 - np_sigm_outgate) expect_dw_ih_i = np.dot(expect_dingate.transpose(1, 0), np_input) expect_dw_ih_f = np.dot(expect_dforgetgate.transpose(1, 0), np_input) expect_dw_ih_c = np.dot(expect_dcellgate.transpose(1, 0), np_input) expect_dw_ih_o = np.dot(expect_doutgate.transpose(1, 0), np_input) expect_dw_ih = np.concatenate((expect_dw_ih_i, expect_dw_ih_f, expect_dw_ih_c, expect_dw_ih_o), axis=0) expect_db_ih_i = np.sum(expect_dingate, axis=0) expect_db_ih_f = np.sum(expect_dforgetgate, axis=0) expect_db_ih_c = np.sum(expect_dcellgate, axis=0) expect_db_ih_o = np.sum(expect_doutgate, axis=0) expect_db_ih = np.concatenate((expect_db_ih_i, expect_db_ih_f, expect_db_ih_c, expect_db_ih_o), axis=0) expect_dinput = np.dot(expect_dingate, np_w_ih_i) + np.dot(expect_dforgetgate, np_w_ih_f) +\ np.dot(expect_dcellgate, np_w_ih_c) + np.dot(expect_doutgate, np_w_ih_o) expect_dw_hh_i = np.dot(expect_dingate.transpose(1, 0), hx) expect_dw_hh_f = np.dot(expect_dforgetgate.transpose(1, 0), hx) expect_dw_hh_c = np.dot(expect_dcellgate.transpose(1, 0), hx) expect_dw_hh_o = np.dot(expect_doutgate.transpose(1, 0), hx) expect_dw_hh = np.concatenate((expect_dw_hh_i, expect_dw_hh_f, expect_dw_hh_c, expect_dw_hh_o), axis=0) expect_db_hh = expect_db_ih expect_dhx = np.dot(expect_dingate, np_w_hh_i) + np.dot(expect_dforgetgate, np_w_hh_f) +\ np.dot(expect_dcellgate, np_w_hh_c) + np.dot(expect_doutgate, np_w_hh_o) return [expect_dinput, expect_dhx, expect_dcx, expect_dw_ih, expect_dw_hh, expect_db_ih, expect_db_hh] def lstmcell_grad_h_run(shape, dtype, kernel_name="lstm_grad_h", attrs={}): shapes = init_lstmcell_shapes(shape) print("lstmcell_grad_h - shapes:", shapes) mod = utils.op_build_test(lstmcell_grad_h, shapes, [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[], kernel_name='lstmcell_grad_h', attrs=attrs) np_input, cx, hx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx =\ init_lstmcell_data(shapes, dtype) dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx = utils.mod_launch(mod, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx), outputs=(-7, -6, -5, -4, -3, -2, -1)) # verification code return None, None, None, True def lstmcell_grad_c_run(shape, dtype, kernel_name="lstm_grad_c", attrs={}): shapes = init_lstmcell_shapes(shape) print("lstmcell_grad_c - shapes:", shapes) mod = utils.op_build_test(lstmcell_grad_c, init_lstmcell_shapes(shape), [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[], kernel_name='lstmcell_grad_c', attrs=attrs) # print(mod.imported_modules[0].get_source()) np_input, cx, hx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx =\ init_lstmcell_data(shapes, dtype) # dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx = utils.mod_launch(mod, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx), # outputs=(-7, -6, -5, -4, -3, -2, -1)) # verification code # tensor_list = [dx, dhx, dcx, dw_ih, dw_hh, db_ih, db_hh] # expected_tensor_list = lstm_backward_data_h(np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh) # assert_res = True # for input_id in range(0, 8): # act_output = tensor_list[input_id] # print("act_output =\n", act_output) # print("expect_output =\n", expected_tensor_list[input_id]) # assert_res = compare_tensor(act_output, expected_tensor_list[input_id], rtol = 5e-02, atol = 1e-4, equal_nan=True) # print("LSTM_cell_c_grad input_id = ", input_id, "; assert_res = ", assert_res) # print("Max error = " , np.max(np.abs(act_output - expected_tensor_list[input_id]))) # input("Press ENTER...") return None, None, None, True def lstmcell_h_ad_run(shape, dtype, kernel_name="lstmcell_h_ad", attrs={}): batch_size, input_size, hidden_size = shape shapes = init_lstmcell_shapes(shape) print("lstmcell_h_ad - shapes:", shapes) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) for input_id in range(0, 7): mod = utils.op_build_test(lstmcell_h_ad, shapes[0:8], [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: b_hh, b_ih, cx, expected_tensor_list, gradh, hx, np_input, tensor_list, w_hh, w_ih = gen_lstmcell_h_ad_data( dtype, shapes) act_output = tensor_list[input_id] return mod, expected_tensor_list, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh, act_output) else: return mod else: assert_res = True for input_id in range(0, 7): mod = utils.op_build_test(lstmcell_h_ad, shapes[0:8], [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name='lstmcell_h_ad', attrs=attrs) # print(mod.imported_modules[0].get_source()) b_hh, b_ih, cx, expected_tensor_list, gradh, hx, np_input, tensor_list, w_hh, w_ih = gen_lstmcell_h_ad_data( dtype, shapes) act_output = tensor_list[input_id] act_output = utils.mod_launch(mod, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh, act_output), expect=expected_tensor_list[input_id]) print("act_output =\n", act_output) print("expect_output =\n", expected_tensor_list[input_id]) assert_res = compare_tensor(act_output, expected_tensor_list[input_id], rtol=5e-02, atol=5e-3, equal_nan=True) print("LSTM_cell_h input_id = ", input_id, "; assert_res = ", assert_res) print("Max error = ", np.max(np.abs(act_output - expected_tensor_list[input_id]))) # input("Press ENTER...") return None, None, None, True def gen_lstmcell_h_ad_data(dtype, shapes): np_input, cx, hx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx = \ init_lstmcell_data(shapes, dtype) # lstmcell(input, hx, cx, w_ih, w_hh, b_ih, b_hh) tensor_list = [dx, dhx, dcx, dw_ih, dw_hh, db_ih, db_hh] expected_tensor_list = lstm_backward_data_h(np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradh) return b_hh, b_ih, cx, expected_tensor_list, gradh, hx, np_input, tensor_list, w_hh, w_ih def lstmcell_c_ad_run(shape, dtype, kernel_name="lstmcell_c_ad", attrs={}): batch_size, input_size, hidden_size = shape shapes = init_lstmcell_shapes(shape) print("lstmcell_c_ad - shapes:", shapes) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) for input_id in range(0, 7): mod = utils.op_build_test(lstmcell_c_ad, shapes[0:8], [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: b_hh, b_ih, cx, expected_tensor_list, gradc, hx, np_input, tensor_list, w_hh, w_ih = \ gen_lstmcell_c_ad_data(dtype, shapes) act_output = tensor_list[input_id] return mod, expected_tensor_list, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradc, act_output) else: return mod else: assert_res = True for input_id in range(0, 7): mod = utils.op_build_test(lstmcell_c_ad, shapes[0:8], [dtype, dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name='lstmcell_c_ad', attrs=attrs) b_hh, b_ih, cx, expected_tensor_list, gradc, hx, np_input, tensor_list, w_hh, w_ih = gen_lstmcell_c_ad_data( dtype, shapes) act_output = tensor_list[input_id] act_output = utils.mod_launch(mod, (np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradc, act_output)) print("act_output =\n", act_output) print("expect_output =\n", expected_tensor_list[input_id]) assert_res &= compare_tensor(act_output, expected_tensor_list[input_id], rtol=1e-02, atol=1e-2, equal_nan=True) print("LSTM_cell_c input_id = ", input_id, "; assert_res = ", assert_res) print("Max error = ", np.max(np.abs(act_output - expected_tensor_list[input_id]))) # input("Press ENTER...") return None, None, None, True def gen_lstmcell_c_ad_data(dtype, shapes): np_input, cx, hx, w_ih, w_hh, b_ih, b_hh, gradh, gradc, dw_ih, dw_hh, db_ih, db_hh, dcx, dhx, dx = \ init_lstmcell_data(shapes, dtype) # lstmcell(input, hx, cx, w_ih, w_hh, b_ih, b_hh) tensor_list = [dx, dhx, dcx, dw_ih, dw_hh, db_ih, db_hh] expected_tensor_list = lstm_backward_data_c(np_input, hx, cx, w_ih, w_hh, b_ih, b_hh, gradc) return b_hh, b_ih, cx, expected_tensor_list, gradc, hx, np_input, tensor_list, w_hh, w_ih def init_rnncell_shapes(shape): # shape: batch_size, input_size, hidden_states_size batch_size, input_size, hidden_states_size = shape input_shape = (batch_size, input_size) hidden_shape = (batch_size, hidden_states_size) w_ih_shape = (hidden_states_size, input_size) w_hh_shape = (hidden_states_size, hidden_states_size) b_ih_shape = (hidden_states_size,) b_hh_shape = (hidden_states_size,) grad_shape = (batch_size, hidden_states_size,) return [input_shape, hidden_shape, w_ih_shape, w_hh_shape, b_ih_shape, b_hh_shape, grad_shape] def init_rnncell_data(shapes, dtype): input = random_gaussian(shapes[0], miu=0.1, sigma=0.1).astype(dtype) hidden = random_gaussian(shapes[1], miu=0.1, sigma=0.1).astype(dtype) w_ih = random_gaussian(shapes[2], miu=0.1, sigma=0.1).astype(dtype) w_hh = random_gaussian(shapes[3], miu=0.1, sigma=0.1).astype(dtype) b_ih = random_gaussian(shapes[4], miu=0.1, sigma=0.1).astype(dtype) b_hh = random_gaussian(shapes[5], miu=0.1, sigma=0.1).astype(dtype) grad = random_gaussian(shapes[6], miu=0.1, sigma=0.1).astype(dtype) dinput = np.full(input.shape, np.nan, dtype) dhidden = np.full(hidden.shape, np.nan, dtype) dw_ih = np.full(w_ih.shape, np.nan, dtype) dw_hh = np.full(w_hh.shape, np.nan, dtype) db_ih = np.full(b_ih.shape, np.nan, dtype) db_hh = np.full(b_hh.shape, np.nan, dtype) return input, hidden, w_ih, w_hh, b_ih, b_hh, grad, dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh def rnn_tanh_cell_ad_run(shape, dtype, kernel_name="rnncell_tanh_ad", attrs={}): shapes = init_rnncell_shapes(shape) print("rnncell_tanh_ad - shapes:", shapes) assert_res = True if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) for input_id in range(0, 6): mod = utils.op_build_test(rnncell_tanh_ad, shapes[0:7], [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: b_hh, b_ih, expected_tensor_list, grad, hidden, np_input, tensor_list, w_hh, w_ih = gen_rnn_tanh_cell_ad_data( dtype, shapes) act_output = tensor_list[input_id] return mod, expected_tensor_list, (np_input, hidden, w_ih, w_hh, b_ih, b_hh, grad, act_output) else: return mod else: for input_id in range(0, 6): mod = utils.op_build_test(rnncell_tanh_ad, shapes[0:7], [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name='rnn_tanh_cell_ad', attrs=attrs) b_hh, b_ih, expected_tensor_list, grad, hidden, np_input, tensor_list, w_hh, w_ih = gen_rnn_tanh_cell_ad_data( dtype, shapes) act_output = tensor_list[input_id] act_output = utils.mod_launch(mod, (np_input, hidden, w_ih, w_hh, b_ih, b_hh, grad, act_output)) compare_result_tensor = compare_tensor(act_output, expected_tensor_list[input_id], rtol=5e-02, atol=1e-4, equal_nan=True) print("RNN_cell input_id = ", input_id, "; assert_res = ", compare_result_tensor) assert_res = assert_res & compare_result_tensor return None, None, None, assert_res def gen_rnn_tanh_cell_ad_data(dtype, shapes): np_input, hidden, w_ih, w_hh, b_ih, b_hh, grad, dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh = init_rnncell_data( shapes, dtype) tensor_list = [dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh] np_igates = np.dot(np_input, w_ih.transpose(1, 0)) + b_ih np_hgates = np.dot(hidden, w_hh.transpose(1, 0)) + b_hh np_h = np.tanh(np_igates + np_hgates) expect_dgates = grad * (1.0 - np_h * np_h) expect_dw_ih = np.dot(expect_dgates.transpose(1, 0), np_input) expect_db_ih = np.sum(expect_dgates, axis=0) expect_dw_hh = np.dot(expect_dgates.transpose(1, 0), hidden) expect_db_hh = np.sum(expect_dgates, axis=0) expect_dinput = np.dot(expect_dgates, w_ih) expect_dhidden = np.dot(expect_dgates, w_hh) expected_tensor_list = [expect_dinput, expect_dhidden, expect_dw_ih, expect_dw_hh, expect_db_ih, expect_db_hh] return b_hh, b_ih, expected_tensor_list, grad, hidden, np_input, tensor_list, w_hh, w_ih def rnn_tanh_cell_grad_run(shape, dtype, kernel_name="rnn_tanh_cell_grad", attrs={}): shapes = init_rnncell_shapes(shape) print("rnn_tanh_cell_grad - shapes:", shapes) mod = utils.op_build_test(rnn_tanh_cell_grad, shapes, [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[], kernel_name='rnn_tanh_cell_grad', attrs=attrs) # verification code return None, None, None, True def rnn_relu_cell_grad_run(shape, dtype, kernel_name="rnn_relu_cell_grad", attrs={}): shapes = init_rnncell_shapes(shape) print("rnn_relu_cell_grad - shapes:", shapes) input, hidden, w_ih, w_hh, b_ih, b_hh, grad, dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh = init_rnncell_data(shapes, dtype) mod = utils.op_build_test(rnn_relu_cell_grad, shapes, [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[], kernel_name='rnn_relu_cell_grad', attrs=attrs) # verification code return None, None, None, True def rnn_relu_cell_ad_run(shape, dtype, kernel_name="rnncell_tanh_ad", attrs={}): shapes = init_rnncell_shapes(shape) print("rnncell_tanh_ad - shapes:", shapes) assert_res = True if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) for input_id in range(0, 6): mod = utils.op_build_test(rnncell_relu_ad, shapes[0:7], [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: b_hh, b_ih, expected_tensor_list, grad, np_hidden, np_input, tensor_list, w_hh, w_ih = gen_rnn_relu_cell_ad_data( dtype, shapes) act_output = tensor_list[input_id] return mod, expected_tensor_list, (np_input, np_hidden, w_ih, w_hh, b_ih, b_hh, grad, act_output) else: return mod else: for input_id in range(0, 6): mod = utils.op_build_test(rnncell_relu_ad, shapes[0:7], [dtype, dtype, dtype, dtype, dtype, dtype, dtype], op_attrs=[input_id], kernel_name='rnncell_relu_ad', attrs=attrs) # print(mod.imported_modules[0].get_source()) b_hh, b_ih, expected_tensor_list, grad, np_hidden, np_input, tensor_list, w_hh, w_ih = gen_rnn_relu_cell_ad_data( dtype, shapes) act_output = tensor_list[input_id] act_output = utils.mod_launch(mod, (np_input, np_hidden, w_ih, w_hh, b_ih, b_hh, grad, act_output)) print("act_output = ", act_output) print("expect_output = ", expected_tensor_list[input_id]) assert_res &= compare_tensor(act_output, expected_tensor_list[input_id], rtol=5e-02, atol=1e-4, equal_nan=True) print("RNN_cell input_id = ", input_id, "; assert_res = ", assert_res) return None, None, None, True def gen_rnn_relu_cell_ad_data(dtype, shapes): np_input, np_hidden, w_ih, w_hh, b_ih, b_hh, grad, dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh = init_rnncell_data( shapes, dtype) tensor_list = [dinput, dhidden, dw_ih, dw_hh, db_ih, db_hh] # igates = dense(input, w_ih, b_ih, use_bias) # hgates = dense(hidden, w_hh, b_hh, use_bias) # h = relu6(igates + hgates) np_igates = np.dot(np_input, w_ih.transpose(1, 0)) + b_ih np_hgates = np.dot(np_hidden, w_hh.transpose(1, 0)) + b_hh np_h = np_igates + np_hgates np_h[np_h < 0.0] = 0.0 np_h[np_h > 6.0] = 6.0 expect_dh = np.ones_like(np_h) expect_dh[expect_dh < 0.0] = 0.0 expect_dh[expect_dh > 6.0] = 0.0 expect_dgates = grad * expect_dh expect_dw_ih = np.dot(expect_dgates.transpose(1, 0), np_input) expect_db_ih = np.sum(expect_dgates, axis=0) expect_dw_hh = np.dot(expect_dgates.transpose(1, 0), np_hidden) expect_db_hh = np.sum(expect_dgates, axis=0) expect_dinput = np.dot(expect_dgates, w_ih) expect_dhidden = np.dot(expect_dgates, w_hh) expected_tensor_list = [expect_dinput, expect_dhidden, expect_dw_ih, expect_dw_hh, expect_db_ih, expect_db_hh] return b_hh, b_ih, expected_tensor_list, grad, np_hidden, np_input, tensor_list, w_hh, w_ih