提交 df98c24f 编写于 作者: X xixiaoyao

fix pred

上级 8a99149a
{'token_ids': [[-1, -1], 'int64'], 'label_ids': [[-1], 'int64']}
<paddlepalm.backbone.ernie.ERNIE object at 0x7fcf583f53d0>
{'token_ids': [[-1, -1], 'int64'], 'label_ids': [[-1], 'int64'], u'input_mask': [[-1, -1, 1], 'float32'], u'position_ids': [[-1, -1], 'int64'], u'task_ids': [[-1, -1], 'int64'], u'segment_ids': [[-1, -1], 'int64']}
[debug] : 0, input_mask
[debug] : 0, position_ids
[debug] : 0, segment_ids
[debug] : 0, task_ids
[debug] : 0, token_ids
[debug] : 0, senti_cls.label_ids
[debug] : 0, print_token_ids_0.tmp_0
[debug] : 0, word_embedding
[debug] : 0, embedding_0.tmp_0
[debug] : 0, pos_embedding
[debug] : 0, embedding_1.tmp_0
[debug] : 0, sent_embedding
[debug] : 0, embedding_2.tmp_0
[debug] : 0, tmp_0
[debug] : 0, tmp_1
[debug] : 0, task_embedding
[debug] : 0, embedding_3.tmp_0
[debug] : 0, tmp_2
[debug] : 0, reduce_mean_0.tmp_0
[debug] : 0, elementwise_sub_0
[debug] : 0, square_0.tmp_0
[debug] : 0, reduce_mean_1.tmp_0
[debug] : 0, tmp_3
[debug] : 0, rsqrt_0.tmp_0
[debug] : 0, elementwise_mul_0
[debug] : 0, pre_encoder_layer_norm_scale
[debug] : 0, pre_encoder_layer_norm_bias
[debug] : 0, elementwise_mul_1
[debug] : 0, elementwise_add_0
[debug] : 0, dropout_0.tmp_0
[debug] : 0, dropout_0.tmp_1
[debug] : 0, matmul_0.tmp_0
[debug] : 0, scale_0.tmp_0
[debug] : 0, stack_0.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_query_fc.w_0
[debug] : 0, fc_0.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_query_fc.b_0
[debug] : 0, fc_0.tmp_1
[debug] : 0, encoder_layer_0_multi_head_att_key_fc.w_0
[debug] : 0, fc_1.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_key_fc.b_0
[debug] : 0, fc_1.tmp_1
[debug] : 0, encoder_layer_0_multi_head_att_value_fc.w_0
[debug] : 0, fc_2.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_value_fc.b_0
[debug] : 0, fc_2.tmp_1
[debug] : 0, reshape2_0.tmp_0
[debug] : 0, transpose_0.tmp_0
[debug] : 0, transpose_0.tmp_1
[debug] : 0, reshape2_1.tmp_0
[debug] : 0, transpose_1.tmp_0
[debug] : 0, transpose_1.tmp_1
[debug] : 0, reshape2_2.tmp_0
[debug] : 0, transpose_2.tmp_0
[debug] : 0, transpose_2.tmp_1
[debug] : 0, scale_1.tmp_0
[debug] : 0, matmul_1.tmp_0
[debug] : 0, tmp_4
[debug] : 0, softmax_0.tmp_0
[debug] : 0, dropout_1.tmp_0
[debug] : 0, dropout_1.tmp_1
[debug] : 0, matmul_2.tmp_0
[debug] : 0, transpose_3.tmp_0
[debug] : 0, transpose_3.tmp_1
[debug] : 0, reshape2_3.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_output_fc.w_0
[debug] : 0, fc_3.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_output_fc.b_0
[debug] : 0, fc_3.tmp_1
[debug] : 0, dropout_2.tmp_0
[debug] : 0, dropout_2.tmp_1
[debug] : 0, tmp_5
[debug] : 0, reduce_mean_2.tmp_0
[debug] : 0, elementwise_sub_1
[debug] : 0, square_1.tmp_0
[debug] : 0, reduce_mean_3.tmp_0
[debug] : 0, tmp_6
[debug] : 0, rsqrt_1.tmp_0
[debug] : 0, elementwise_mul_2
[debug] : 0, encoder_layer_0_post_att_layer_norm_scale
[debug] : 0, encoder_layer_0_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_3
[debug] : 0, elementwise_add_1
[debug] : 0, encoder_layer_0_ffn_fc_0.w_0
[debug] : 0, fc_4.tmp_0
[debug] : 0, encoder_layer_0_ffn_fc_0.b_0
[debug] : 0, fc_4.tmp_1
[debug] : 0, fc_4.tmp_2
[debug] : 0, encoder_layer_0_ffn_fc_1.w_0
[debug] : 0, fc_5.tmp_0
[debug] : 0, encoder_layer_0_ffn_fc_1.b_0
[debug] : 0, fc_5.tmp_1
[debug] : 0, dropout_3.tmp_0
[debug] : 0, dropout_3.tmp_1
[debug] : 0, tmp_7
[debug] : 0, reduce_mean_4.tmp_0
[debug] : 0, elementwise_sub_2
[debug] : 0, square_2.tmp_0
[debug] : 0, reduce_mean_5.tmp_0
[debug] : 0, tmp_8
[debug] : 0, rsqrt_2.tmp_0
[debug] : 0, elementwise_mul_4
[debug] : 0, encoder_layer_0_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_0_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_5
[debug] : 0, elementwise_add_2
[debug] : 0, encoder_layer_1_multi_head_att_query_fc.w_0
[debug] : 0, fc_6.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_query_fc.b_0
[debug] : 0, fc_6.tmp_1
[debug] : 0, encoder_layer_1_multi_head_att_key_fc.w_0
[debug] : 0, fc_7.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_key_fc.b_0
[debug] : 0, fc_7.tmp_1
[debug] : 0, encoder_layer_1_multi_head_att_value_fc.w_0
[debug] : 0, fc_8.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_value_fc.b_0
[debug] : 0, fc_8.tmp_1
[debug] : 0, reshape2_4.tmp_0
[debug] : 0, transpose_4.tmp_0
[debug] : 0, transpose_4.tmp_1
[debug] : 0, reshape2_5.tmp_0
[debug] : 0, transpose_5.tmp_0
[debug] : 0, transpose_5.tmp_1
[debug] : 0, reshape2_6.tmp_0
[debug] : 0, transpose_6.tmp_0
[debug] : 0, transpose_6.tmp_1
[debug] : 0, scale_2.tmp_0
[debug] : 0, matmul_3.tmp_0
[debug] : 0, tmp_9
[debug] : 0, softmax_1.tmp_0
[debug] : 0, dropout_4.tmp_0
[debug] : 0, dropout_4.tmp_1
[debug] : 0, matmul_4.tmp_0
[debug] : 0, transpose_7.tmp_0
[debug] : 0, transpose_7.tmp_1
[debug] : 0, reshape2_7.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_output_fc.w_0
[debug] : 0, fc_9.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_output_fc.b_0
[debug] : 0, fc_9.tmp_1
[debug] : 0, dropout_5.tmp_0
[debug] : 0, dropout_5.tmp_1
[debug] : 0, tmp_10
[debug] : 0, reduce_mean_6.tmp_0
[debug] : 0, elementwise_sub_3
[debug] : 0, square_3.tmp_0
[debug] : 0, reduce_mean_7.tmp_0
[debug] : 0, tmp_11
[debug] : 0, rsqrt_3.tmp_0
[debug] : 0, elementwise_mul_6
[debug] : 0, encoder_layer_1_post_att_layer_norm_scale
[debug] : 0, encoder_layer_1_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_7
[debug] : 0, elementwise_add_3
[debug] : 0, encoder_layer_1_ffn_fc_0.w_0
[debug] : 0, fc_10.tmp_0
[debug] : 0, encoder_layer_1_ffn_fc_0.b_0
[debug] : 0, fc_10.tmp_1
[debug] : 0, fc_10.tmp_2
[debug] : 0, encoder_layer_1_ffn_fc_1.w_0
[debug] : 0, fc_11.tmp_0
[debug] : 0, encoder_layer_1_ffn_fc_1.b_0
[debug] : 0, fc_11.tmp_1
[debug] : 0, dropout_6.tmp_0
[debug] : 0, dropout_6.tmp_1
[debug] : 0, tmp_12
[debug] : 0, reduce_mean_8.tmp_0
[debug] : 0, elementwise_sub_4
[debug] : 0, square_4.tmp_0
[debug] : 0, reduce_mean_9.tmp_0
[debug] : 0, tmp_13
[debug] : 0, rsqrt_4.tmp_0
[debug] : 0, elementwise_mul_8
[debug] : 0, encoder_layer_1_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_1_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_9
[debug] : 0, elementwise_add_4
[debug] : 0, encoder_layer_2_multi_head_att_query_fc.w_0
[debug] : 0, fc_12.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_query_fc.b_0
[debug] : 0, fc_12.tmp_1
[debug] : 0, encoder_layer_2_multi_head_att_key_fc.w_0
[debug] : 0, fc_13.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_key_fc.b_0
[debug] : 0, fc_13.tmp_1
[debug] : 0, encoder_layer_2_multi_head_att_value_fc.w_0
[debug] : 0, fc_14.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_value_fc.b_0
[debug] : 0, fc_14.tmp_1
[debug] : 0, reshape2_8.tmp_0
[debug] : 0, transpose_8.tmp_0
[debug] : 0, transpose_8.tmp_1
[debug] : 0, reshape2_9.tmp_0
[debug] : 0, transpose_9.tmp_0
[debug] : 0, transpose_9.tmp_1
[debug] : 0, reshape2_10.tmp_0
[debug] : 0, transpose_10.tmp_0
[debug] : 0, transpose_10.tmp_1
[debug] : 0, scale_3.tmp_0
[debug] : 0, matmul_5.tmp_0
[debug] : 0, tmp_14
[debug] : 0, softmax_2.tmp_0
[debug] : 0, dropout_7.tmp_0
[debug] : 0, dropout_7.tmp_1
[debug] : 0, matmul_6.tmp_0
[debug] : 0, transpose_11.tmp_0
[debug] : 0, transpose_11.tmp_1
[debug] : 0, reshape2_11.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_output_fc.w_0
[debug] : 0, fc_15.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_output_fc.b_0
[debug] : 0, fc_15.tmp_1
[debug] : 0, dropout_8.tmp_0
[debug] : 0, dropout_8.tmp_1
[debug] : 0, tmp_15
[debug] : 0, reduce_mean_10.tmp_0
[debug] : 0, elementwise_sub_5
[debug] : 0, square_5.tmp_0
[debug] : 0, reduce_mean_11.tmp_0
[debug] : 0, tmp_16
[debug] : 0, rsqrt_5.tmp_0
[debug] : 0, elementwise_mul_10
[debug] : 0, encoder_layer_2_post_att_layer_norm_scale
[debug] : 0, encoder_layer_2_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_11
[debug] : 0, elementwise_add_5
[debug] : 0, encoder_layer_2_ffn_fc_0.w_0
[debug] : 0, fc_16.tmp_0
[debug] : 0, encoder_layer_2_ffn_fc_0.b_0
[debug] : 0, fc_16.tmp_1
[debug] : 0, fc_16.tmp_2
[debug] : 0, encoder_layer_2_ffn_fc_1.w_0
[debug] : 0, fc_17.tmp_0
[debug] : 0, encoder_layer_2_ffn_fc_1.b_0
[debug] : 0, fc_17.tmp_1
[debug] : 0, dropout_9.tmp_0
[debug] : 0, dropout_9.tmp_1
[debug] : 0, tmp_17
[debug] : 0, reduce_mean_12.tmp_0
[debug] : 0, elementwise_sub_6
[debug] : 0, square_6.tmp_0
[debug] : 0, reduce_mean_13.tmp_0
[debug] : 0, tmp_18
[debug] : 0, rsqrt_6.tmp_0
[debug] : 0, elementwise_mul_12
[debug] : 0, encoder_layer_2_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_2_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_13
[debug] : 0, elementwise_add_6
[debug] : 0, encoder_layer_3_multi_head_att_query_fc.w_0
[debug] : 0, fc_18.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_query_fc.b_0
[debug] : 0, fc_18.tmp_1
[debug] : 0, encoder_layer_3_multi_head_att_key_fc.w_0
[debug] : 0, fc_19.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_key_fc.b_0
[debug] : 0, fc_19.tmp_1
[debug] : 0, encoder_layer_3_multi_head_att_value_fc.w_0
[debug] : 0, fc_20.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_value_fc.b_0
[debug] : 0, fc_20.tmp_1
[debug] : 0, reshape2_12.tmp_0
[debug] : 0, transpose_12.tmp_0
[debug] : 0, transpose_12.tmp_1
[debug] : 0, reshape2_13.tmp_0
[debug] : 0, transpose_13.tmp_0
[debug] : 0, transpose_13.tmp_1
[debug] : 0, reshape2_14.tmp_0
[debug] : 0, transpose_14.tmp_0
[debug] : 0, transpose_14.tmp_1
[debug] : 0, scale_4.tmp_0
[debug] : 0, matmul_7.tmp_0
[debug] : 0, tmp_19
[debug] : 0, softmax_3.tmp_0
[debug] : 0, dropout_10.tmp_0
[debug] : 0, dropout_10.tmp_1
[debug] : 0, matmul_8.tmp_0
[debug] : 0, transpose_15.tmp_0
[debug] : 0, transpose_15.tmp_1
[debug] : 0, reshape2_15.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_output_fc.w_0
[debug] : 0, fc_21.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_output_fc.b_0
[debug] : 0, fc_21.tmp_1
[debug] : 0, dropout_11.tmp_0
[debug] : 0, dropout_11.tmp_1
[debug] : 0, tmp_20
[debug] : 0, reduce_mean_14.tmp_0
[debug] : 0, elementwise_sub_7
[debug] : 0, square_7.tmp_0
[debug] : 0, reduce_mean_15.tmp_0
[debug] : 0, tmp_21
[debug] : 0, rsqrt_7.tmp_0
[debug] : 0, elementwise_mul_14
[debug] : 0, encoder_layer_3_post_att_layer_norm_scale
[debug] : 0, encoder_layer_3_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_15
[debug] : 0, elementwise_add_7
[debug] : 0, encoder_layer_3_ffn_fc_0.w_0
[debug] : 0, fc_22.tmp_0
[debug] : 0, encoder_layer_3_ffn_fc_0.b_0
[debug] : 0, fc_22.tmp_1
[debug] : 0, fc_22.tmp_2
[debug] : 0, encoder_layer_3_ffn_fc_1.w_0
[debug] : 0, fc_23.tmp_0
[debug] : 0, encoder_layer_3_ffn_fc_1.b_0
[debug] : 0, fc_23.tmp_1
[debug] : 0, dropout_12.tmp_0
[debug] : 0, dropout_12.tmp_1
[debug] : 0, tmp_22
[debug] : 0, reduce_mean_16.tmp_0
[debug] : 0, elementwise_sub_8
[debug] : 0, square_8.tmp_0
[debug] : 0, reduce_mean_17.tmp_0
[debug] : 0, tmp_23
[debug] : 0, rsqrt_8.tmp_0
[debug] : 0, elementwise_mul_16
[debug] : 0, encoder_layer_3_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_3_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_17
[debug] : 0, elementwise_add_8
[debug] : 0, encoder_layer_4_multi_head_att_query_fc.w_0
[debug] : 0, fc_24.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_query_fc.b_0
[debug] : 0, fc_24.tmp_1
[debug] : 0, encoder_layer_4_multi_head_att_key_fc.w_0
[debug] : 0, fc_25.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_key_fc.b_0
[debug] : 0, fc_25.tmp_1
[debug] : 0, encoder_layer_4_multi_head_att_value_fc.w_0
[debug] : 0, fc_26.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_value_fc.b_0
[debug] : 0, fc_26.tmp_1
[debug] : 0, reshape2_16.tmp_0
[debug] : 0, transpose_16.tmp_0
[debug] : 0, transpose_16.tmp_1
[debug] : 0, reshape2_17.tmp_0
[debug] : 0, transpose_17.tmp_0
[debug] : 0, transpose_17.tmp_1
[debug] : 0, reshape2_18.tmp_0
[debug] : 0, transpose_18.tmp_0
[debug] : 0, transpose_18.tmp_1
[debug] : 0, scale_5.tmp_0
[debug] : 0, matmul_9.tmp_0
[debug] : 0, tmp_24
[debug] : 0, softmax_4.tmp_0
[debug] : 0, dropout_13.tmp_0
[debug] : 0, dropout_13.tmp_1
[debug] : 0, matmul_10.tmp_0
[debug] : 0, transpose_19.tmp_0
[debug] : 0, transpose_19.tmp_1
[debug] : 0, reshape2_19.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_output_fc.w_0
[debug] : 0, fc_27.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_output_fc.b_0
[debug] : 0, fc_27.tmp_1
[debug] : 0, dropout_14.tmp_0
[debug] : 0, dropout_14.tmp_1
[debug] : 0, tmp_25
[debug] : 0, reduce_mean_18.tmp_0
[debug] : 0, elementwise_sub_9
[debug] : 0, square_9.tmp_0
[debug] : 0, reduce_mean_19.tmp_0
[debug] : 0, tmp_26
[debug] : 0, rsqrt_9.tmp_0
[debug] : 0, elementwise_mul_18
[debug] : 0, encoder_layer_4_post_att_layer_norm_scale
[debug] : 0, encoder_layer_4_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_19
[debug] : 0, elementwise_add_9
[debug] : 0, encoder_layer_4_ffn_fc_0.w_0
[debug] : 0, fc_28.tmp_0
[debug] : 0, encoder_layer_4_ffn_fc_0.b_0
[debug] : 0, fc_28.tmp_1
[debug] : 0, fc_28.tmp_2
[debug] : 0, encoder_layer_4_ffn_fc_1.w_0
[debug] : 0, fc_29.tmp_0
[debug] : 0, encoder_layer_4_ffn_fc_1.b_0
[debug] : 0, fc_29.tmp_1
[debug] : 0, dropout_15.tmp_0
[debug] : 0, dropout_15.tmp_1
[debug] : 0, tmp_27
[debug] : 0, reduce_mean_20.tmp_0
[debug] : 0, elementwise_sub_10
[debug] : 0, square_10.tmp_0
[debug] : 0, reduce_mean_21.tmp_0
[debug] : 0, tmp_28
[debug] : 0, rsqrt_10.tmp_0
[debug] : 0, elementwise_mul_20
[debug] : 0, encoder_layer_4_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_4_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_21
[debug] : 0, elementwise_add_10
[debug] : 0, encoder_layer_5_multi_head_att_query_fc.w_0
[debug] : 0, fc_30.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_query_fc.b_0
[debug] : 0, fc_30.tmp_1
[debug] : 0, encoder_layer_5_multi_head_att_key_fc.w_0
[debug] : 0, fc_31.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_key_fc.b_0
[debug] : 0, fc_31.tmp_1
[debug] : 0, encoder_layer_5_multi_head_att_value_fc.w_0
[debug] : 0, fc_32.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_value_fc.b_0
[debug] : 0, fc_32.tmp_1
[debug] : 0, reshape2_20.tmp_0
[debug] : 0, transpose_20.tmp_0
[debug] : 0, transpose_20.tmp_1
[debug] : 0, reshape2_21.tmp_0
[debug] : 0, transpose_21.tmp_0
[debug] : 0, transpose_21.tmp_1
[debug] : 0, reshape2_22.tmp_0
[debug] : 0, transpose_22.tmp_0
[debug] : 0, transpose_22.tmp_1
[debug] : 0, scale_6.tmp_0
[debug] : 0, matmul_11.tmp_0
[debug] : 0, tmp_29
[debug] : 0, softmax_5.tmp_0
[debug] : 0, dropout_16.tmp_0
[debug] : 0, dropout_16.tmp_1
[debug] : 0, matmul_12.tmp_0
[debug] : 0, transpose_23.tmp_0
[debug] : 0, transpose_23.tmp_1
[debug] : 0, reshape2_23.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_output_fc.w_0
[debug] : 0, fc_33.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_output_fc.b_0
[debug] : 0, fc_33.tmp_1
[debug] : 0, dropout_17.tmp_0
[debug] : 0, dropout_17.tmp_1
[debug] : 0, tmp_30
[debug] : 0, reduce_mean_22.tmp_0
[debug] : 0, elementwise_sub_11
[debug] : 0, square_11.tmp_0
[debug] : 0, reduce_mean_23.tmp_0
[debug] : 0, tmp_31
[debug] : 0, rsqrt_11.tmp_0
[debug] : 0, elementwise_mul_22
[debug] : 0, encoder_layer_5_post_att_layer_norm_scale
[debug] : 0, encoder_layer_5_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_23
[debug] : 0, elementwise_add_11
[debug] : 0, encoder_layer_5_ffn_fc_0.w_0
[debug] : 0, fc_34.tmp_0
[debug] : 0, encoder_layer_5_ffn_fc_0.b_0
[debug] : 0, fc_34.tmp_1
[debug] : 0, fc_34.tmp_2
[debug] : 0, encoder_layer_5_ffn_fc_1.w_0
[debug] : 0, fc_35.tmp_0
[debug] : 0, encoder_layer_5_ffn_fc_1.b_0
[debug] : 0, fc_35.tmp_1
[debug] : 0, dropout_18.tmp_0
[debug] : 0, dropout_18.tmp_1
[debug] : 0, tmp_32
[debug] : 0, reduce_mean_24.tmp_0
[debug] : 0, elementwise_sub_12
[debug] : 0, square_12.tmp_0
[debug] : 0, reduce_mean_25.tmp_0
[debug] : 0, tmp_33
[debug] : 0, rsqrt_12.tmp_0
[debug] : 0, elementwise_mul_24
[debug] : 0, encoder_layer_5_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_5_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_25
[debug] : 0, elementwise_add_12
[debug] : 0, encoder_layer_6_multi_head_att_query_fc.w_0
[debug] : 0, fc_36.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_query_fc.b_0
[debug] : 0, fc_36.tmp_1
[debug] : 0, encoder_layer_6_multi_head_att_key_fc.w_0
[debug] : 0, fc_37.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_key_fc.b_0
[debug] : 0, fc_37.tmp_1
[debug] : 0, encoder_layer_6_multi_head_att_value_fc.w_0
[debug] : 0, fc_38.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_value_fc.b_0
[debug] : 0, fc_38.tmp_1
[debug] : 0, reshape2_24.tmp_0
[debug] : 0, transpose_24.tmp_0
[debug] : 0, transpose_24.tmp_1
[debug] : 0, reshape2_25.tmp_0
[debug] : 0, transpose_25.tmp_0
[debug] : 0, transpose_25.tmp_1
[debug] : 0, reshape2_26.tmp_0
[debug] : 0, transpose_26.tmp_0
[debug] : 0, transpose_26.tmp_1
[debug] : 0, scale_7.tmp_0
[debug] : 0, matmul_13.tmp_0
[debug] : 0, tmp_34
[debug] : 0, softmax_6.tmp_0
[debug] : 0, dropout_19.tmp_0
[debug] : 0, dropout_19.tmp_1
[debug] : 0, matmul_14.tmp_0
[debug] : 0, transpose_27.tmp_0
[debug] : 0, transpose_27.tmp_1
[debug] : 0, reshape2_27.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_output_fc.w_0
[debug] : 0, fc_39.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_output_fc.b_0
[debug] : 0, fc_39.tmp_1
[debug] : 0, dropout_20.tmp_0
[debug] : 0, dropout_20.tmp_1
[debug] : 0, tmp_35
[debug] : 0, reduce_mean_26.tmp_0
[debug] : 0, elementwise_sub_13
[debug] : 0, square_13.tmp_0
[debug] : 0, reduce_mean_27.tmp_0
[debug] : 0, tmp_36
[debug] : 0, rsqrt_13.tmp_0
[debug] : 0, elementwise_mul_26
[debug] : 0, encoder_layer_6_post_att_layer_norm_scale
[debug] : 0, encoder_layer_6_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_27
[debug] : 0, elementwise_add_13
[debug] : 0, encoder_layer_6_ffn_fc_0.w_0
[debug] : 0, fc_40.tmp_0
[debug] : 0, encoder_layer_6_ffn_fc_0.b_0
[debug] : 0, fc_40.tmp_1
[debug] : 0, fc_40.tmp_2
[debug] : 0, encoder_layer_6_ffn_fc_1.w_0
[debug] : 0, fc_41.tmp_0
[debug] : 0, encoder_layer_6_ffn_fc_1.b_0
[debug] : 0, fc_41.tmp_1
[debug] : 0, dropout_21.tmp_0
[debug] : 0, dropout_21.tmp_1
[debug] : 0, tmp_37
[debug] : 0, reduce_mean_28.tmp_0
[debug] : 0, elementwise_sub_14
[debug] : 0, square_14.tmp_0
[debug] : 0, reduce_mean_29.tmp_0
[debug] : 0, tmp_38
[debug] : 0, rsqrt_14.tmp_0
[debug] : 0, elementwise_mul_28
[debug] : 0, encoder_layer_6_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_6_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_29
[debug] : 0, elementwise_add_14
[debug] : 0, encoder_layer_7_multi_head_att_query_fc.w_0
[debug] : 0, fc_42.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_query_fc.b_0
[debug] : 0, fc_42.tmp_1
[debug] : 0, encoder_layer_7_multi_head_att_key_fc.w_0
[debug] : 0, fc_43.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_key_fc.b_0
[debug] : 0, fc_43.tmp_1
[debug] : 0, encoder_layer_7_multi_head_att_value_fc.w_0
[debug] : 0, fc_44.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_value_fc.b_0
[debug] : 0, fc_44.tmp_1
[debug] : 0, reshape2_28.tmp_0
[debug] : 0, transpose_28.tmp_0
[debug] : 0, transpose_28.tmp_1
[debug] : 0, reshape2_29.tmp_0
[debug] : 0, transpose_29.tmp_0
[debug] : 0, transpose_29.tmp_1
[debug] : 0, reshape2_30.tmp_0
[debug] : 0, transpose_30.tmp_0
[debug] : 0, transpose_30.tmp_1
[debug] : 0, scale_8.tmp_0
[debug] : 0, matmul_15.tmp_0
[debug] : 0, tmp_39
[debug] : 0, softmax_7.tmp_0
[debug] : 0, dropout_22.tmp_0
[debug] : 0, dropout_22.tmp_1
[debug] : 0, matmul_16.tmp_0
[debug] : 0, transpose_31.tmp_0
[debug] : 0, transpose_31.tmp_1
[debug] : 0, reshape2_31.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_output_fc.w_0
[debug] : 0, fc_45.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_output_fc.b_0
[debug] : 0, fc_45.tmp_1
[debug] : 0, dropout_23.tmp_0
[debug] : 0, dropout_23.tmp_1
[debug] : 0, tmp_40
[debug] : 0, reduce_mean_30.tmp_0
[debug] : 0, elementwise_sub_15
[debug] : 0, square_15.tmp_0
[debug] : 0, reduce_mean_31.tmp_0
[debug] : 0, tmp_41
[debug] : 0, rsqrt_15.tmp_0
[debug] : 0, elementwise_mul_30
[debug] : 0, encoder_layer_7_post_att_layer_norm_scale
[debug] : 0, encoder_layer_7_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_31
[debug] : 0, elementwise_add_15
[debug] : 0, encoder_layer_7_ffn_fc_0.w_0
[debug] : 0, fc_46.tmp_0
[debug] : 0, encoder_layer_7_ffn_fc_0.b_0
[debug] : 0, fc_46.tmp_1
[debug] : 0, fc_46.tmp_2
[debug] : 0, encoder_layer_7_ffn_fc_1.w_0
[debug] : 0, fc_47.tmp_0
[debug] : 0, encoder_layer_7_ffn_fc_1.b_0
[debug] : 0, fc_47.tmp_1
[debug] : 0, dropout_24.tmp_0
[debug] : 0, dropout_24.tmp_1
[debug] : 0, tmp_42
[debug] : 0, reduce_mean_32.tmp_0
[debug] : 0, elementwise_sub_16
[debug] : 0, square_16.tmp_0
[debug] : 0, reduce_mean_33.tmp_0
[debug] : 0, tmp_43
[debug] : 0, rsqrt_16.tmp_0
[debug] : 0, elementwise_mul_32
[debug] : 0, encoder_layer_7_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_7_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_33
[debug] : 0, elementwise_add_16
[debug] : 0, encoder_layer_8_multi_head_att_query_fc.w_0
[debug] : 0, fc_48.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_query_fc.b_0
[debug] : 0, fc_48.tmp_1
[debug] : 0, encoder_layer_8_multi_head_att_key_fc.w_0
[debug] : 0, fc_49.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_key_fc.b_0
[debug] : 0, fc_49.tmp_1
[debug] : 0, encoder_layer_8_multi_head_att_value_fc.w_0
[debug] : 0, fc_50.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_value_fc.b_0
[debug] : 0, fc_50.tmp_1
[debug] : 0, reshape2_32.tmp_0
[debug] : 0, transpose_32.tmp_0
[debug] : 0, transpose_32.tmp_1
[debug] : 0, reshape2_33.tmp_0
[debug] : 0, transpose_33.tmp_0
[debug] : 0, transpose_33.tmp_1
[debug] : 0, reshape2_34.tmp_0
[debug] : 0, transpose_34.tmp_0
[debug] : 0, transpose_34.tmp_1
[debug] : 0, scale_9.tmp_0
[debug] : 0, matmul_17.tmp_0
[debug] : 0, tmp_44
[debug] : 0, softmax_8.tmp_0
[debug] : 0, dropout_25.tmp_0
[debug] : 0, dropout_25.tmp_1
[debug] : 0, matmul_18.tmp_0
[debug] : 0, transpose_35.tmp_0
[debug] : 0, transpose_35.tmp_1
[debug] : 0, reshape2_35.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_output_fc.w_0
[debug] : 0, fc_51.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_output_fc.b_0
[debug] : 0, fc_51.tmp_1
[debug] : 0, dropout_26.tmp_0
[debug] : 0, dropout_26.tmp_1
[debug] : 0, tmp_45
[debug] : 0, reduce_mean_34.tmp_0
[debug] : 0, elementwise_sub_17
[debug] : 0, square_17.tmp_0
[debug] : 0, reduce_mean_35.tmp_0
[debug] : 0, tmp_46
[debug] : 0, rsqrt_17.tmp_0
[debug] : 0, elementwise_mul_34
[debug] : 0, encoder_layer_8_post_att_layer_norm_scale
[debug] : 0, encoder_layer_8_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_35
[debug] : 0, elementwise_add_17
[debug] : 0, encoder_layer_8_ffn_fc_0.w_0
[debug] : 0, fc_52.tmp_0
[debug] : 0, encoder_layer_8_ffn_fc_0.b_0
[debug] : 0, fc_52.tmp_1
[debug] : 0, fc_52.tmp_2
[debug] : 0, encoder_layer_8_ffn_fc_1.w_0
[debug] : 0, fc_53.tmp_0
[debug] : 0, encoder_layer_8_ffn_fc_1.b_0
[debug] : 0, fc_53.tmp_1
[debug] : 0, dropout_27.tmp_0
[debug] : 0, dropout_27.tmp_1
[debug] : 0, tmp_47
[debug] : 0, reduce_mean_36.tmp_0
[debug] : 0, elementwise_sub_18
[debug] : 0, square_18.tmp_0
[debug] : 0, reduce_mean_37.tmp_0
[debug] : 0, tmp_48
[debug] : 0, rsqrt_18.tmp_0
[debug] : 0, elementwise_mul_36
[debug] : 0, encoder_layer_8_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_8_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_37
[debug] : 0, elementwise_add_18
[debug] : 0, encoder_layer_9_multi_head_att_query_fc.w_0
[debug] : 0, fc_54.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_query_fc.b_0
[debug] : 0, fc_54.tmp_1
[debug] : 0, encoder_layer_9_multi_head_att_key_fc.w_0
[debug] : 0, fc_55.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_key_fc.b_0
[debug] : 0, fc_55.tmp_1
[debug] : 0, encoder_layer_9_multi_head_att_value_fc.w_0
[debug] : 0, fc_56.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_value_fc.b_0
[debug] : 0, fc_56.tmp_1
[debug] : 0, reshape2_36.tmp_0
[debug] : 0, transpose_36.tmp_0
[debug] : 0, transpose_36.tmp_1
[debug] : 0, reshape2_37.tmp_0
[debug] : 0, transpose_37.tmp_0
[debug] : 0, transpose_37.tmp_1
[debug] : 0, reshape2_38.tmp_0
[debug] : 0, transpose_38.tmp_0
[debug] : 0, transpose_38.tmp_1
[debug] : 0, scale_10.tmp_0
[debug] : 0, matmul_19.tmp_0
[debug] : 0, tmp_49
[debug] : 0, softmax_9.tmp_0
[debug] : 0, dropout_28.tmp_0
[debug] : 0, dropout_28.tmp_1
[debug] : 0, matmul_20.tmp_0
[debug] : 0, transpose_39.tmp_0
[debug] : 0, transpose_39.tmp_1
[debug] : 0, reshape2_39.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_output_fc.w_0
[debug] : 0, fc_57.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_output_fc.b_0
[debug] : 0, fc_57.tmp_1
[debug] : 0, dropout_29.tmp_0
[debug] : 0, dropout_29.tmp_1
[debug] : 0, tmp_50
[debug] : 0, reduce_mean_38.tmp_0
[debug] : 0, elementwise_sub_19
[debug] : 0, square_19.tmp_0
[debug] : 0, reduce_mean_39.tmp_0
[debug] : 0, tmp_51
[debug] : 0, rsqrt_19.tmp_0
[debug] : 0, elementwise_mul_38
[debug] : 0, encoder_layer_9_post_att_layer_norm_scale
[debug] : 0, encoder_layer_9_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_39
[debug] : 0, elementwise_add_19
[debug] : 0, encoder_layer_9_ffn_fc_0.w_0
[debug] : 0, fc_58.tmp_0
[debug] : 0, encoder_layer_9_ffn_fc_0.b_0
[debug] : 0, fc_58.tmp_1
[debug] : 0, fc_58.tmp_2
[debug] : 0, encoder_layer_9_ffn_fc_1.w_0
[debug] : 0, fc_59.tmp_0
[debug] : 0, encoder_layer_9_ffn_fc_1.b_0
[debug] : 0, fc_59.tmp_1
[debug] : 0, dropout_30.tmp_0
[debug] : 0, dropout_30.tmp_1
[debug] : 0, tmp_52
[debug] : 0, reduce_mean_40.tmp_0
[debug] : 0, elementwise_sub_20
[debug] : 0, square_20.tmp_0
[debug] : 0, reduce_mean_41.tmp_0
[debug] : 0, tmp_53
[debug] : 0, rsqrt_20.tmp_0
[debug] : 0, elementwise_mul_40
[debug] : 0, encoder_layer_9_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_9_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_41
[debug] : 0, elementwise_add_20
[debug] : 0, encoder_layer_10_multi_head_att_query_fc.w_0
[debug] : 0, fc_60.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_query_fc.b_0
[debug] : 0, fc_60.tmp_1
[debug] : 0, encoder_layer_10_multi_head_att_key_fc.w_0
[debug] : 0, fc_61.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_key_fc.b_0
[debug] : 0, fc_61.tmp_1
[debug] : 0, encoder_layer_10_multi_head_att_value_fc.w_0
[debug] : 0, fc_62.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_value_fc.b_0
[debug] : 0, fc_62.tmp_1
[debug] : 0, reshape2_40.tmp_0
[debug] : 0, transpose_40.tmp_0
[debug] : 0, transpose_40.tmp_1
[debug] : 0, reshape2_41.tmp_0
[debug] : 0, transpose_41.tmp_0
[debug] : 0, transpose_41.tmp_1
[debug] : 0, reshape2_42.tmp_0
[debug] : 0, transpose_42.tmp_0
[debug] : 0, transpose_42.tmp_1
[debug] : 0, scale_11.tmp_0
[debug] : 0, matmul_21.tmp_0
[debug] : 0, tmp_54
[debug] : 0, softmax_10.tmp_0
[debug] : 0, dropout_31.tmp_0
[debug] : 0, dropout_31.tmp_1
[debug] : 0, matmul_22.tmp_0
[debug] : 0, transpose_43.tmp_0
[debug] : 0, transpose_43.tmp_1
[debug] : 0, reshape2_43.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_output_fc.w_0
[debug] : 0, fc_63.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_output_fc.b_0
[debug] : 0, fc_63.tmp_1
[debug] : 0, dropout_32.tmp_0
[debug] : 0, dropout_32.tmp_1
[debug] : 0, tmp_55
[debug] : 0, reduce_mean_42.tmp_0
[debug] : 0, elementwise_sub_21
[debug] : 0, square_21.tmp_0
[debug] : 0, reduce_mean_43.tmp_0
[debug] : 0, tmp_56
[debug] : 0, rsqrt_21.tmp_0
[debug] : 0, elementwise_mul_42
[debug] : 0, encoder_layer_10_post_att_layer_norm_scale
[debug] : 0, encoder_layer_10_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_43
[debug] : 0, elementwise_add_21
[debug] : 0, encoder_layer_10_ffn_fc_0.w_0
[debug] : 0, fc_64.tmp_0
[debug] : 0, encoder_layer_10_ffn_fc_0.b_0
[debug] : 0, fc_64.tmp_1
[debug] : 0, fc_64.tmp_2
[debug] : 0, encoder_layer_10_ffn_fc_1.w_0
[debug] : 0, fc_65.tmp_0
[debug] : 0, encoder_layer_10_ffn_fc_1.b_0
[debug] : 0, fc_65.tmp_1
[debug] : 0, dropout_33.tmp_0
[debug] : 0, dropout_33.tmp_1
[debug] : 0, tmp_57
[debug] : 0, reduce_mean_44.tmp_0
[debug] : 0, elementwise_sub_22
[debug] : 0, square_22.tmp_0
[debug] : 0, reduce_mean_45.tmp_0
[debug] : 0, tmp_58
[debug] : 0, rsqrt_22.tmp_0
[debug] : 0, elementwise_mul_44
[debug] : 0, encoder_layer_10_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_10_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_45
[debug] : 0, elementwise_add_22
[debug] : 0, encoder_layer_11_multi_head_att_query_fc.w_0
[debug] : 0, fc_66.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_query_fc.b_0
[debug] : 0, fc_66.tmp_1
[debug] : 0, encoder_layer_11_multi_head_att_key_fc.w_0
[debug] : 0, fc_67.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_key_fc.b_0
[debug] : 0, fc_67.tmp_1
[debug] : 0, encoder_layer_11_multi_head_att_value_fc.w_0
[debug] : 0, fc_68.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_value_fc.b_0
[debug] : 0, fc_68.tmp_1
[debug] : 0, reshape2_44.tmp_0
[debug] : 0, transpose_44.tmp_0
[debug] : 0, transpose_44.tmp_1
[debug] : 0, reshape2_45.tmp_0
[debug] : 0, transpose_45.tmp_0
[debug] : 0, transpose_45.tmp_1
[debug] : 0, reshape2_46.tmp_0
[debug] : 0, transpose_46.tmp_0
[debug] : 0, transpose_46.tmp_1
[debug] : 0, scale_12.tmp_0
[debug] : 0, matmul_23.tmp_0
[debug] : 0, tmp_59
[debug] : 0, softmax_11.tmp_0
[debug] : 0, dropout_34.tmp_0
[debug] : 0, dropout_34.tmp_1
[debug] : 0, matmul_24.tmp_0
[debug] : 0, transpose_47.tmp_0
[debug] : 0, transpose_47.tmp_1
[debug] : 0, reshape2_47.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_output_fc.w_0
[debug] : 0, fc_69.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_output_fc.b_0
[debug] : 0, fc_69.tmp_1
[debug] : 0, dropout_35.tmp_0
[debug] : 0, dropout_35.tmp_1
[debug] : 0, tmp_60
[debug] : 0, reduce_mean_46.tmp_0
[debug] : 0, elementwise_sub_23
[debug] : 0, square_23.tmp_0
[debug] : 0, reduce_mean_47.tmp_0
[debug] : 0, tmp_61
[debug] : 0, rsqrt_23.tmp_0
[debug] : 0, elementwise_mul_46
[debug] : 0, encoder_layer_11_post_att_layer_norm_scale
[debug] : 0, encoder_layer_11_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_47
[debug] : 0, elementwise_add_23
[debug] : 0, encoder_layer_11_ffn_fc_0.w_0
[debug] : 0, fc_70.tmp_0
[debug] : 0, encoder_layer_11_ffn_fc_0.b_0
[debug] : 0, fc_70.tmp_1
[debug] : 0, fc_70.tmp_2
[debug] : 0, encoder_layer_11_ffn_fc_1.w_0
[debug] : 0, fc_71.tmp_0
[debug] : 0, encoder_layer_11_ffn_fc_1.b_0
[debug] : 0, fc_71.tmp_1
[debug] : 0, dropout_36.tmp_0
[debug] : 0, dropout_36.tmp_1
[debug] : 0, tmp_62
[debug] : 0, reduce_mean_48.tmp_0
[debug] : 0, elementwise_sub_24
[debug] : 0, square_24.tmp_0
[debug] : 0, reduce_mean_49.tmp_0
[debug] : 0, tmp_63
[debug] : 0, rsqrt_24.tmp_0
[debug] : 0, elementwise_mul_48
[debug] : 0, encoder_layer_11_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_11_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_49
[debug] : 0, elementwise_add_24
[debug] : 0, encoder_layer_12_multi_head_att_query_fc.w_0
[debug] : 0, fc_72.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_query_fc.b_0
[debug] : 0, fc_72.tmp_1
[debug] : 0, encoder_layer_12_multi_head_att_key_fc.w_0
[debug] : 0, fc_73.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_key_fc.b_0
[debug] : 0, fc_73.tmp_1
[debug] : 0, encoder_layer_12_multi_head_att_value_fc.w_0
[debug] : 0, fc_74.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_value_fc.b_0
[debug] : 0, fc_74.tmp_1
[debug] : 0, reshape2_48.tmp_0
[debug] : 0, transpose_48.tmp_0
[debug] : 0, transpose_48.tmp_1
[debug] : 0, reshape2_49.tmp_0
[debug] : 0, transpose_49.tmp_0
[debug] : 0, transpose_49.tmp_1
[debug] : 0, reshape2_50.tmp_0
[debug] : 0, transpose_50.tmp_0
[debug] : 0, transpose_50.tmp_1
[debug] : 0, scale_13.tmp_0
[debug] : 0, matmul_25.tmp_0
[debug] : 0, tmp_64
[debug] : 0, softmax_12.tmp_0
[debug] : 0, dropout_37.tmp_0
[debug] : 0, dropout_37.tmp_1
[debug] : 0, matmul_26.tmp_0
[debug] : 0, transpose_51.tmp_0
[debug] : 0, transpose_51.tmp_1
[debug] : 0, reshape2_51.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_output_fc.w_0
[debug] : 0, fc_75.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_output_fc.b_0
[debug] : 0, fc_75.tmp_1
[debug] : 0, dropout_38.tmp_0
[debug] : 0, dropout_38.tmp_1
[debug] : 0, tmp_65
[debug] : 0, reduce_mean_50.tmp_0
[debug] : 0, elementwise_sub_25
[debug] : 0, square_25.tmp_0
[debug] : 0, reduce_mean_51.tmp_0
[debug] : 0, tmp_66
[debug] : 0, rsqrt_25.tmp_0
[debug] : 0, elementwise_mul_50
[debug] : 0, encoder_layer_12_post_att_layer_norm_scale
[debug] : 0, encoder_layer_12_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_51
[debug] : 0, elementwise_add_25
[debug] : 0, encoder_layer_12_ffn_fc_0.w_0
[debug] : 0, fc_76.tmp_0
[debug] : 0, encoder_layer_12_ffn_fc_0.b_0
[debug] : 0, fc_76.tmp_1
[debug] : 0, fc_76.tmp_2
[debug] : 0, encoder_layer_12_ffn_fc_1.w_0
[debug] : 0, fc_77.tmp_0
[debug] : 0, encoder_layer_12_ffn_fc_1.b_0
[debug] : 0, fc_77.tmp_1
[debug] : 0, dropout_39.tmp_0
[debug] : 0, dropout_39.tmp_1
[debug] : 0, tmp_67
[debug] : 0, reduce_mean_52.tmp_0
[debug] : 0, elementwise_sub_26
[debug] : 0, square_26.tmp_0
[debug] : 0, reduce_mean_53.tmp_0
[debug] : 0, tmp_68
[debug] : 0, rsqrt_26.tmp_0
[debug] : 0, elementwise_mul_52
[debug] : 0, encoder_layer_12_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_12_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_53
[debug] : 0, elementwise_add_26
[debug] : 0, encoder_layer_13_multi_head_att_query_fc.w_0
[debug] : 0, fc_78.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_query_fc.b_0
[debug] : 0, fc_78.tmp_1
[debug] : 0, encoder_layer_13_multi_head_att_key_fc.w_0
[debug] : 0, fc_79.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_key_fc.b_0
[debug] : 0, fc_79.tmp_1
[debug] : 0, encoder_layer_13_multi_head_att_value_fc.w_0
[debug] : 0, fc_80.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_value_fc.b_0
[debug] : 0, fc_80.tmp_1
[debug] : 0, reshape2_52.tmp_0
[debug] : 0, transpose_52.tmp_0
[debug] : 0, transpose_52.tmp_1
[debug] : 0, reshape2_53.tmp_0
[debug] : 0, transpose_53.tmp_0
[debug] : 0, transpose_53.tmp_1
[debug] : 0, reshape2_54.tmp_0
[debug] : 0, transpose_54.tmp_0
[debug] : 0, transpose_54.tmp_1
[debug] : 0, scale_14.tmp_0
[debug] : 0, matmul_27.tmp_0
[debug] : 0, tmp_69
[debug] : 0, softmax_13.tmp_0
[debug] : 0, dropout_40.tmp_0
[debug] : 0, dropout_40.tmp_1
[debug] : 0, matmul_28.tmp_0
[debug] : 0, transpose_55.tmp_0
[debug] : 0, transpose_55.tmp_1
[debug] : 0, reshape2_55.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_output_fc.w_0
[debug] : 0, fc_81.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_output_fc.b_0
[debug] : 0, fc_81.tmp_1
[debug] : 0, dropout_41.tmp_0
[debug] : 0, dropout_41.tmp_1
[debug] : 0, tmp_70
[debug] : 0, reduce_mean_54.tmp_0
[debug] : 0, elementwise_sub_27
[debug] : 0, square_27.tmp_0
[debug] : 0, reduce_mean_55.tmp_0
[debug] : 0, tmp_71
[debug] : 0, rsqrt_27.tmp_0
[debug] : 0, elementwise_mul_54
[debug] : 0, encoder_layer_13_post_att_layer_norm_scale
[debug] : 0, encoder_layer_13_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_55
[debug] : 0, elementwise_add_27
[debug] : 0, encoder_layer_13_ffn_fc_0.w_0
[debug] : 0, fc_82.tmp_0
[debug] : 0, encoder_layer_13_ffn_fc_0.b_0
[debug] : 0, fc_82.tmp_1
[debug] : 0, fc_82.tmp_2
[debug] : 0, encoder_layer_13_ffn_fc_1.w_0
[debug] : 0, fc_83.tmp_0
[debug] : 0, encoder_layer_13_ffn_fc_1.b_0
[debug] : 0, fc_83.tmp_1
[debug] : 0, dropout_42.tmp_0
[debug] : 0, dropout_42.tmp_1
[debug] : 0, tmp_72
[debug] : 0, reduce_mean_56.tmp_0
[debug] : 0, elementwise_sub_28
[debug] : 0, square_28.tmp_0
[debug] : 0, reduce_mean_57.tmp_0
[debug] : 0, tmp_73
[debug] : 0, rsqrt_28.tmp_0
[debug] : 0, elementwise_mul_56
[debug] : 0, encoder_layer_13_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_13_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_57
[debug] : 0, elementwise_add_28
[debug] : 0, encoder_layer_14_multi_head_att_query_fc.w_0
[debug] : 0, fc_84.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_query_fc.b_0
[debug] : 0, fc_84.tmp_1
[debug] : 0, encoder_layer_14_multi_head_att_key_fc.w_0
[debug] : 0, fc_85.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_key_fc.b_0
[debug] : 0, fc_85.tmp_1
[debug] : 0, encoder_layer_14_multi_head_att_value_fc.w_0
[debug] : 0, fc_86.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_value_fc.b_0
[debug] : 0, fc_86.tmp_1
[debug] : 0, reshape2_56.tmp_0
[debug] : 0, transpose_56.tmp_0
[debug] : 0, transpose_56.tmp_1
[debug] : 0, reshape2_57.tmp_0
[debug] : 0, transpose_57.tmp_0
[debug] : 0, transpose_57.tmp_1
[debug] : 0, reshape2_58.tmp_0
[debug] : 0, transpose_58.tmp_0
[debug] : 0, transpose_58.tmp_1
[debug] : 0, scale_15.tmp_0
[debug] : 0, matmul_29.tmp_0
[debug] : 0, tmp_74
[debug] : 0, softmax_14.tmp_0
[debug] : 0, dropout_43.tmp_0
[debug] : 0, dropout_43.tmp_1
[debug] : 0, matmul_30.tmp_0
[debug] : 0, transpose_59.tmp_0
[debug] : 0, transpose_59.tmp_1
[debug] : 0, reshape2_59.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_output_fc.w_0
[debug] : 0, fc_87.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_output_fc.b_0
[debug] : 0, fc_87.tmp_1
[debug] : 0, dropout_44.tmp_0
[debug] : 0, dropout_44.tmp_1
[debug] : 0, tmp_75
[debug] : 0, reduce_mean_58.tmp_0
[debug] : 0, elementwise_sub_29
[debug] : 0, square_29.tmp_0
[debug] : 0, reduce_mean_59.tmp_0
[debug] : 0, tmp_76
[debug] : 0, rsqrt_29.tmp_0
[debug] : 0, elementwise_mul_58
[debug] : 0, encoder_layer_14_post_att_layer_norm_scale
[debug] : 0, encoder_layer_14_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_59
[debug] : 0, elementwise_add_29
[debug] : 0, encoder_layer_14_ffn_fc_0.w_0
[debug] : 0, fc_88.tmp_0
[debug] : 0, encoder_layer_14_ffn_fc_0.b_0
[debug] : 0, fc_88.tmp_1
[debug] : 0, fc_88.tmp_2
[debug] : 0, encoder_layer_14_ffn_fc_1.w_0
[debug] : 0, fc_89.tmp_0
[debug] : 0, encoder_layer_14_ffn_fc_1.b_0
[debug] : 0, fc_89.tmp_1
[debug] : 0, dropout_45.tmp_0
[debug] : 0, dropout_45.tmp_1
[debug] : 0, tmp_77
[debug] : 0, reduce_mean_60.tmp_0
[debug] : 0, elementwise_sub_30
[debug] : 0, square_30.tmp_0
[debug] : 0, reduce_mean_61.tmp_0
[debug] : 0, tmp_78
[debug] : 0, rsqrt_30.tmp_0
[debug] : 0, elementwise_mul_60
[debug] : 0, encoder_layer_14_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_14_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_61
[debug] : 0, elementwise_add_30
[debug] : 0, encoder_layer_15_multi_head_att_query_fc.w_0
[debug] : 0, fc_90.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_query_fc.b_0
[debug] : 0, fc_90.tmp_1
[debug] : 0, encoder_layer_15_multi_head_att_key_fc.w_0
[debug] : 0, fc_91.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_key_fc.b_0
[debug] : 0, fc_91.tmp_1
[debug] : 0, encoder_layer_15_multi_head_att_value_fc.w_0
[debug] : 0, fc_92.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_value_fc.b_0
[debug] : 0, fc_92.tmp_1
[debug] : 0, reshape2_60.tmp_0
[debug] : 0, transpose_60.tmp_0
[debug] : 0, transpose_60.tmp_1
[debug] : 0, reshape2_61.tmp_0
[debug] : 0, transpose_61.tmp_0
[debug] : 0, transpose_61.tmp_1
[debug] : 0, reshape2_62.tmp_0
[debug] : 0, transpose_62.tmp_0
[debug] : 0, transpose_62.tmp_1
[debug] : 0, scale_16.tmp_0
[debug] : 0, matmul_31.tmp_0
[debug] : 0, tmp_79
[debug] : 0, softmax_15.tmp_0
[debug] : 0, dropout_46.tmp_0
[debug] : 0, dropout_46.tmp_1
[debug] : 0, matmul_32.tmp_0
[debug] : 0, transpose_63.tmp_0
[debug] : 0, transpose_63.tmp_1
[debug] : 0, reshape2_63.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_output_fc.w_0
[debug] : 0, fc_93.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_output_fc.b_0
[debug] : 0, fc_93.tmp_1
[debug] : 0, dropout_47.tmp_0
[debug] : 0, dropout_47.tmp_1
[debug] : 0, tmp_80
[debug] : 0, reduce_mean_62.tmp_0
[debug] : 0, elementwise_sub_31
[debug] : 0, square_31.tmp_0
[debug] : 0, reduce_mean_63.tmp_0
[debug] : 0, tmp_81
[debug] : 0, rsqrt_31.tmp_0
[debug] : 0, elementwise_mul_62
[debug] : 0, encoder_layer_15_post_att_layer_norm_scale
[debug] : 0, encoder_layer_15_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_63
[debug] : 0, elementwise_add_31
[debug] : 0, encoder_layer_15_ffn_fc_0.w_0
[debug] : 0, fc_94.tmp_0
[debug] : 0, encoder_layer_15_ffn_fc_0.b_0
[debug] : 0, fc_94.tmp_1
[debug] : 0, fc_94.tmp_2
[debug] : 0, encoder_layer_15_ffn_fc_1.w_0
[debug] : 0, fc_95.tmp_0
[debug] : 0, encoder_layer_15_ffn_fc_1.b_0
[debug] : 0, fc_95.tmp_1
[debug] : 0, dropout_48.tmp_0
[debug] : 0, dropout_48.tmp_1
[debug] : 0, tmp_82
[debug] : 0, reduce_mean_64.tmp_0
[debug] : 0, elementwise_sub_32
[debug] : 0, square_32.tmp_0
[debug] : 0, reduce_mean_65.tmp_0
[debug] : 0, tmp_83
[debug] : 0, rsqrt_32.tmp_0
[debug] : 0, elementwise_mul_64
[debug] : 0, encoder_layer_15_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_15_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_65
[debug] : 0, elementwise_add_32
[debug] : 0, encoder_layer_16_multi_head_att_query_fc.w_0
[debug] : 0, fc_96.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_query_fc.b_0
[debug] : 0, fc_96.tmp_1
[debug] : 0, encoder_layer_16_multi_head_att_key_fc.w_0
[debug] : 0, fc_97.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_key_fc.b_0
[debug] : 0, fc_97.tmp_1
[debug] : 0, encoder_layer_16_multi_head_att_value_fc.w_0
[debug] : 0, fc_98.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_value_fc.b_0
[debug] : 0, fc_98.tmp_1
[debug] : 0, reshape2_64.tmp_0
[debug] : 0, transpose_64.tmp_0
[debug] : 0, transpose_64.tmp_1
[debug] : 0, reshape2_65.tmp_0
[debug] : 0, transpose_65.tmp_0
[debug] : 0, transpose_65.tmp_1
[debug] : 0, reshape2_66.tmp_0
[debug] : 0, transpose_66.tmp_0
[debug] : 0, transpose_66.tmp_1
[debug] : 0, scale_17.tmp_0
[debug] : 0, matmul_33.tmp_0
[debug] : 0, tmp_84
[debug] : 0, softmax_16.tmp_0
[debug] : 0, dropout_49.tmp_0
[debug] : 0, dropout_49.tmp_1
[debug] : 0, matmul_34.tmp_0
[debug] : 0, transpose_67.tmp_0
[debug] : 0, transpose_67.tmp_1
[debug] : 0, reshape2_67.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_output_fc.w_0
[debug] : 0, fc_99.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_output_fc.b_0
[debug] : 0, fc_99.tmp_1
[debug] : 0, dropout_50.tmp_0
[debug] : 0, dropout_50.tmp_1
[debug] : 0, tmp_85
[debug] : 0, reduce_mean_66.tmp_0
[debug] : 0, elementwise_sub_33
[debug] : 0, square_33.tmp_0
[debug] : 0, reduce_mean_67.tmp_0
[debug] : 0, tmp_86
[debug] : 0, rsqrt_33.tmp_0
[debug] : 0, elementwise_mul_66
[debug] : 0, encoder_layer_16_post_att_layer_norm_scale
[debug] : 0, encoder_layer_16_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_67
[debug] : 0, elementwise_add_33
[debug] : 0, encoder_layer_16_ffn_fc_0.w_0
[debug] : 0, fc_100.tmp_0
[debug] : 0, encoder_layer_16_ffn_fc_0.b_0
[debug] : 0, fc_100.tmp_1
[debug] : 0, fc_100.tmp_2
[debug] : 0, encoder_layer_16_ffn_fc_1.w_0
[debug] : 0, fc_101.tmp_0
[debug] : 0, encoder_layer_16_ffn_fc_1.b_0
[debug] : 0, fc_101.tmp_1
[debug] : 0, dropout_51.tmp_0
[debug] : 0, dropout_51.tmp_1
[debug] : 0, tmp_87
[debug] : 0, reduce_mean_68.tmp_0
[debug] : 0, elementwise_sub_34
[debug] : 0, square_34.tmp_0
[debug] : 0, reduce_mean_69.tmp_0
[debug] : 0, tmp_88
[debug] : 0, rsqrt_34.tmp_0
[debug] : 0, elementwise_mul_68
[debug] : 0, encoder_layer_16_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_16_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_69
[debug] : 0, elementwise_add_34
[debug] : 0, encoder_layer_17_multi_head_att_query_fc.w_0
[debug] : 0, fc_102.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_query_fc.b_0
[debug] : 0, fc_102.tmp_1
[debug] : 0, encoder_layer_17_multi_head_att_key_fc.w_0
[debug] : 0, fc_103.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_key_fc.b_0
[debug] : 0, fc_103.tmp_1
[debug] : 0, encoder_layer_17_multi_head_att_value_fc.w_0
[debug] : 0, fc_104.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_value_fc.b_0
[debug] : 0, fc_104.tmp_1
[debug] : 0, reshape2_68.tmp_0
[debug] : 0, transpose_68.tmp_0
[debug] : 0, transpose_68.tmp_1
[debug] : 0, reshape2_69.tmp_0
[debug] : 0, transpose_69.tmp_0
[debug] : 0, transpose_69.tmp_1
[debug] : 0, reshape2_70.tmp_0
[debug] : 0, transpose_70.tmp_0
[debug] : 0, transpose_70.tmp_1
[debug] : 0, scale_18.tmp_0
[debug] : 0, matmul_35.tmp_0
[debug] : 0, tmp_89
[debug] : 0, softmax_17.tmp_0
[debug] : 0, dropout_52.tmp_0
[debug] : 0, dropout_52.tmp_1
[debug] : 0, matmul_36.tmp_0
[debug] : 0, transpose_71.tmp_0
[debug] : 0, transpose_71.tmp_1
[debug] : 0, reshape2_71.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_output_fc.w_0
[debug] : 0, fc_105.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_output_fc.b_0
[debug] : 0, fc_105.tmp_1
[debug] : 0, dropout_53.tmp_0
[debug] : 0, dropout_53.tmp_1
[debug] : 0, tmp_90
[debug] : 0, reduce_mean_70.tmp_0
[debug] : 0, elementwise_sub_35
[debug] : 0, square_35.tmp_0
[debug] : 0, reduce_mean_71.tmp_0
[debug] : 0, tmp_91
[debug] : 0, rsqrt_35.tmp_0
[debug] : 0, elementwise_mul_70
[debug] : 0, encoder_layer_17_post_att_layer_norm_scale
[debug] : 0, encoder_layer_17_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_71
[debug] : 0, elementwise_add_35
[debug] : 0, encoder_layer_17_ffn_fc_0.w_0
[debug] : 0, fc_106.tmp_0
[debug] : 0, encoder_layer_17_ffn_fc_0.b_0
[debug] : 0, fc_106.tmp_1
[debug] : 0, fc_106.tmp_2
[debug] : 0, encoder_layer_17_ffn_fc_1.w_0
[debug] : 0, fc_107.tmp_0
[debug] : 0, encoder_layer_17_ffn_fc_1.b_0
[debug] : 0, fc_107.tmp_1
[debug] : 0, dropout_54.tmp_0
[debug] : 0, dropout_54.tmp_1
[debug] : 0, tmp_92
[debug] : 0, reduce_mean_72.tmp_0
[debug] : 0, elementwise_sub_36
[debug] : 0, square_36.tmp_0
[debug] : 0, reduce_mean_73.tmp_0
[debug] : 0, tmp_93
[debug] : 0, rsqrt_36.tmp_0
[debug] : 0, elementwise_mul_72
[debug] : 0, encoder_layer_17_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_17_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_73
[debug] : 0, elementwise_add_36
[debug] : 0, encoder_layer_18_multi_head_att_query_fc.w_0
[debug] : 0, fc_108.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_query_fc.b_0
[debug] : 0, fc_108.tmp_1
[debug] : 0, encoder_layer_18_multi_head_att_key_fc.w_0
[debug] : 0, fc_109.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_key_fc.b_0
[debug] : 0, fc_109.tmp_1
[debug] : 0, encoder_layer_18_multi_head_att_value_fc.w_0
[debug] : 0, fc_110.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_value_fc.b_0
[debug] : 0, fc_110.tmp_1
[debug] : 0, reshape2_72.tmp_0
[debug] : 0, transpose_72.tmp_0
[debug] : 0, transpose_72.tmp_1
[debug] : 0, reshape2_73.tmp_0
[debug] : 0, transpose_73.tmp_0
[debug] : 0, transpose_73.tmp_1
[debug] : 0, reshape2_74.tmp_0
[debug] : 0, transpose_74.tmp_0
[debug] : 0, transpose_74.tmp_1
[debug] : 0, scale_19.tmp_0
[debug] : 0, matmul_37.tmp_0
[debug] : 0, tmp_94
[debug] : 0, softmax_18.tmp_0
[debug] : 0, dropout_55.tmp_0
[debug] : 0, dropout_55.tmp_1
[debug] : 0, matmul_38.tmp_0
[debug] : 0, transpose_75.tmp_0
[debug] : 0, transpose_75.tmp_1
[debug] : 0, reshape2_75.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_output_fc.w_0
[debug] : 0, fc_111.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_output_fc.b_0
[debug] : 0, fc_111.tmp_1
[debug] : 0, dropout_56.tmp_0
[debug] : 0, dropout_56.tmp_1
[debug] : 0, tmp_95
[debug] : 0, reduce_mean_74.tmp_0
[debug] : 0, elementwise_sub_37
[debug] : 0, square_37.tmp_0
[debug] : 0, reduce_mean_75.tmp_0
[debug] : 0, tmp_96
[debug] : 0, rsqrt_37.tmp_0
[debug] : 0, elementwise_mul_74
[debug] : 0, encoder_layer_18_post_att_layer_norm_scale
[debug] : 0, encoder_layer_18_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_75
[debug] : 0, elementwise_add_37
[debug] : 0, encoder_layer_18_ffn_fc_0.w_0
[debug] : 0, fc_112.tmp_0
[debug] : 0, encoder_layer_18_ffn_fc_0.b_0
[debug] : 0, fc_112.tmp_1
[debug] : 0, fc_112.tmp_2
[debug] : 0, encoder_layer_18_ffn_fc_1.w_0
[debug] : 0, fc_113.tmp_0
[debug] : 0, encoder_layer_18_ffn_fc_1.b_0
[debug] : 0, fc_113.tmp_1
[debug] : 0, dropout_57.tmp_0
[debug] : 0, dropout_57.tmp_1
[debug] : 0, tmp_97
[debug] : 0, reduce_mean_76.tmp_0
[debug] : 0, elementwise_sub_38
[debug] : 0, square_38.tmp_0
[debug] : 0, reduce_mean_77.tmp_0
[debug] : 0, tmp_98
[debug] : 0, rsqrt_38.tmp_0
[debug] : 0, elementwise_mul_76
[debug] : 0, encoder_layer_18_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_18_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_77
[debug] : 0, elementwise_add_38
[debug] : 0, encoder_layer_19_multi_head_att_query_fc.w_0
[debug] : 0, fc_114.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_query_fc.b_0
[debug] : 0, fc_114.tmp_1
[debug] : 0, encoder_layer_19_multi_head_att_key_fc.w_0
[debug] : 0, fc_115.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_key_fc.b_0
[debug] : 0, fc_115.tmp_1
[debug] : 0, encoder_layer_19_multi_head_att_value_fc.w_0
[debug] : 0, fc_116.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_value_fc.b_0
[debug] : 0, fc_116.tmp_1
[debug] : 0, reshape2_76.tmp_0
[debug] : 0, transpose_76.tmp_0
[debug] : 0, transpose_76.tmp_1
[debug] : 0, reshape2_77.tmp_0
[debug] : 0, transpose_77.tmp_0
[debug] : 0, transpose_77.tmp_1
[debug] : 0, reshape2_78.tmp_0
[debug] : 0, transpose_78.tmp_0
[debug] : 0, transpose_78.tmp_1
[debug] : 0, scale_20.tmp_0
[debug] : 0, matmul_39.tmp_0
[debug] : 0, tmp_99
[debug] : 0, softmax_19.tmp_0
[debug] : 0, dropout_58.tmp_0
[debug] : 0, dropout_58.tmp_1
[debug] : 0, matmul_40.tmp_0
[debug] : 0, transpose_79.tmp_0
[debug] : 0, transpose_79.tmp_1
[debug] : 0, reshape2_79.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_output_fc.w_0
[debug] : 0, fc_117.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_output_fc.b_0
[debug] : 0, fc_117.tmp_1
[debug] : 0, dropout_59.tmp_0
[debug] : 0, dropout_59.tmp_1
[debug] : 0, tmp_100
[debug] : 0, reduce_mean_78.tmp_0
[debug] : 0, elementwise_sub_39
[debug] : 0, square_39.tmp_0
[debug] : 0, reduce_mean_79.tmp_0
[debug] : 0, tmp_101
[debug] : 0, rsqrt_39.tmp_0
[debug] : 0, elementwise_mul_78
[debug] : 0, encoder_layer_19_post_att_layer_norm_scale
[debug] : 0, encoder_layer_19_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_79
[debug] : 0, elementwise_add_39
[debug] : 0, encoder_layer_19_ffn_fc_0.w_0
[debug] : 0, fc_118.tmp_0
[debug] : 0, encoder_layer_19_ffn_fc_0.b_0
[debug] : 0, fc_118.tmp_1
[debug] : 0, fc_118.tmp_2
[debug] : 0, encoder_layer_19_ffn_fc_1.w_0
[debug] : 0, fc_119.tmp_0
[debug] : 0, encoder_layer_19_ffn_fc_1.b_0
[debug] : 0, fc_119.tmp_1
[debug] : 0, dropout_60.tmp_0
[debug] : 0, dropout_60.tmp_1
[debug] : 0, tmp_102
[debug] : 0, reduce_mean_80.tmp_0
[debug] : 0, elementwise_sub_40
[debug] : 0, square_40.tmp_0
[debug] : 0, reduce_mean_81.tmp_0
[debug] : 0, tmp_103
[debug] : 0, rsqrt_40.tmp_0
[debug] : 0, elementwise_mul_80
[debug] : 0, encoder_layer_19_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_19_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_81
[debug] : 0, elementwise_add_40
[debug] : 0, encoder_layer_20_multi_head_att_query_fc.w_0
[debug] : 0, fc_120.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_query_fc.b_0
[debug] : 0, fc_120.tmp_1
[debug] : 0, encoder_layer_20_multi_head_att_key_fc.w_0
[debug] : 0, fc_121.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_key_fc.b_0
[debug] : 0, fc_121.tmp_1
[debug] : 0, encoder_layer_20_multi_head_att_value_fc.w_0
[debug] : 0, fc_122.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_value_fc.b_0
[debug] : 0, fc_122.tmp_1
[debug] : 0, reshape2_80.tmp_0
[debug] : 0, transpose_80.tmp_0
[debug] : 0, transpose_80.tmp_1
[debug] : 0, reshape2_81.tmp_0
[debug] : 0, transpose_81.tmp_0
[debug] : 0, transpose_81.tmp_1
[debug] : 0, reshape2_82.tmp_0
[debug] : 0, transpose_82.tmp_0
[debug] : 0, transpose_82.tmp_1
[debug] : 0, scale_21.tmp_0
[debug] : 0, matmul_41.tmp_0
[debug] : 0, tmp_104
[debug] : 0, softmax_20.tmp_0
[debug] : 0, dropout_61.tmp_0
[debug] : 0, dropout_61.tmp_1
[debug] : 0, matmul_42.tmp_0
[debug] : 0, transpose_83.tmp_0
[debug] : 0, transpose_83.tmp_1
[debug] : 0, reshape2_83.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_output_fc.w_0
[debug] : 0, fc_123.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_output_fc.b_0
[debug] : 0, fc_123.tmp_1
[debug] : 0, dropout_62.tmp_0
[debug] : 0, dropout_62.tmp_1
[debug] : 0, tmp_105
[debug] : 0, reduce_mean_82.tmp_0
[debug] : 0, elementwise_sub_41
[debug] : 0, square_41.tmp_0
[debug] : 0, reduce_mean_83.tmp_0
[debug] : 0, tmp_106
[debug] : 0, rsqrt_41.tmp_0
[debug] : 0, elementwise_mul_82
[debug] : 0, encoder_layer_20_post_att_layer_norm_scale
[debug] : 0, encoder_layer_20_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_83
[debug] : 0, elementwise_add_41
[debug] : 0, encoder_layer_20_ffn_fc_0.w_0
[debug] : 0, fc_124.tmp_0
[debug] : 0, encoder_layer_20_ffn_fc_0.b_0
[debug] : 0, fc_124.tmp_1
[debug] : 0, fc_124.tmp_2
[debug] : 0, encoder_layer_20_ffn_fc_1.w_0
[debug] : 0, fc_125.tmp_0
[debug] : 0, encoder_layer_20_ffn_fc_1.b_0
[debug] : 0, fc_125.tmp_1
[debug] : 0, dropout_63.tmp_0
[debug] : 0, dropout_63.tmp_1
[debug] : 0, tmp_107
[debug] : 0, reduce_mean_84.tmp_0
[debug] : 0, elementwise_sub_42
[debug] : 0, square_42.tmp_0
[debug] : 0, reduce_mean_85.tmp_0
[debug] : 0, tmp_108
[debug] : 0, rsqrt_42.tmp_0
[debug] : 0, elementwise_mul_84
[debug] : 0, encoder_layer_20_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_20_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_85
[debug] : 0, elementwise_add_42
[debug] : 0, encoder_layer_21_multi_head_att_query_fc.w_0
[debug] : 0, fc_126.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_query_fc.b_0
[debug] : 0, fc_126.tmp_1
[debug] : 0, encoder_layer_21_multi_head_att_key_fc.w_0
[debug] : 0, fc_127.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_key_fc.b_0
[debug] : 0, fc_127.tmp_1
[debug] : 0, encoder_layer_21_multi_head_att_value_fc.w_0
[debug] : 0, fc_128.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_value_fc.b_0
[debug] : 0, fc_128.tmp_1
[debug] : 0, reshape2_84.tmp_0
[debug] : 0, transpose_84.tmp_0
[debug] : 0, transpose_84.tmp_1
[debug] : 0, reshape2_85.tmp_0
[debug] : 0, transpose_85.tmp_0
[debug] : 0, transpose_85.tmp_1
[debug] : 0, reshape2_86.tmp_0
[debug] : 0, transpose_86.tmp_0
[debug] : 0, transpose_86.tmp_1
[debug] : 0, scale_22.tmp_0
[debug] : 0, matmul_43.tmp_0
[debug] : 0, tmp_109
[debug] : 0, softmax_21.tmp_0
[debug] : 0, dropout_64.tmp_0
[debug] : 0, dropout_64.tmp_1
[debug] : 0, matmul_44.tmp_0
[debug] : 0, transpose_87.tmp_0
[debug] : 0, transpose_87.tmp_1
[debug] : 0, reshape2_87.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_output_fc.w_0
[debug] : 0, fc_129.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_output_fc.b_0
[debug] : 0, fc_129.tmp_1
[debug] : 0, dropout_65.tmp_0
[debug] : 0, dropout_65.tmp_1
[debug] : 0, tmp_110
[debug] : 0, reduce_mean_86.tmp_0
[debug] : 0, elementwise_sub_43
[debug] : 0, square_43.tmp_0
[debug] : 0, reduce_mean_87.tmp_0
[debug] : 0, tmp_111
[debug] : 0, rsqrt_43.tmp_0
[debug] : 0, elementwise_mul_86
[debug] : 0, encoder_layer_21_post_att_layer_norm_scale
[debug] : 0, encoder_layer_21_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_87
[debug] : 0, elementwise_add_43
[debug] : 0, encoder_layer_21_ffn_fc_0.w_0
[debug] : 0, fc_130.tmp_0
[debug] : 0, encoder_layer_21_ffn_fc_0.b_0
[debug] : 0, fc_130.tmp_1
[debug] : 0, fc_130.tmp_2
[debug] : 0, encoder_layer_21_ffn_fc_1.w_0
[debug] : 0, fc_131.tmp_0
[debug] : 0, encoder_layer_21_ffn_fc_1.b_0
[debug] : 0, fc_131.tmp_1
[debug] : 0, dropout_66.tmp_0
[debug] : 0, dropout_66.tmp_1
[debug] : 0, tmp_112
[debug] : 0, reduce_mean_88.tmp_0
[debug] : 0, elementwise_sub_44
[debug] : 0, square_44.tmp_0
[debug] : 0, reduce_mean_89.tmp_0
[debug] : 0, tmp_113
[debug] : 0, rsqrt_44.tmp_0
[debug] : 0, elementwise_mul_88
[debug] : 0, encoder_layer_21_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_21_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_89
[debug] : 0, elementwise_add_44
[debug] : 0, encoder_layer_22_multi_head_att_query_fc.w_0
[debug] : 0, fc_132.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_query_fc.b_0
[debug] : 0, fc_132.tmp_1
[debug] : 0, encoder_layer_22_multi_head_att_key_fc.w_0
[debug] : 0, fc_133.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_key_fc.b_0
[debug] : 0, fc_133.tmp_1
[debug] : 0, encoder_layer_22_multi_head_att_value_fc.w_0
[debug] : 0, fc_134.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_value_fc.b_0
[debug] : 0, fc_134.tmp_1
[debug] : 0, reshape2_88.tmp_0
[debug] : 0, transpose_88.tmp_0
[debug] : 0, transpose_88.tmp_1
[debug] : 0, reshape2_89.tmp_0
[debug] : 0, transpose_89.tmp_0
[debug] : 0, transpose_89.tmp_1
[debug] : 0, reshape2_90.tmp_0
[debug] : 0, transpose_90.tmp_0
[debug] : 0, transpose_90.tmp_1
[debug] : 0, scale_23.tmp_0
[debug] : 0, matmul_45.tmp_0
[debug] : 0, tmp_114
[debug] : 0, softmax_22.tmp_0
[debug] : 0, dropout_67.tmp_0
[debug] : 0, dropout_67.tmp_1
[debug] : 0, matmul_46.tmp_0
[debug] : 0, transpose_91.tmp_0
[debug] : 0, transpose_91.tmp_1
[debug] : 0, reshape2_91.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_output_fc.w_0
[debug] : 0, fc_135.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_output_fc.b_0
[debug] : 0, fc_135.tmp_1
[debug] : 0, dropout_68.tmp_0
[debug] : 0, dropout_68.tmp_1
[debug] : 0, tmp_115
[debug] : 0, reduce_mean_90.tmp_0
[debug] : 0, elementwise_sub_45
[debug] : 0, square_45.tmp_0
[debug] : 0, reduce_mean_91.tmp_0
[debug] : 0, tmp_116
[debug] : 0, rsqrt_45.tmp_0
[debug] : 0, elementwise_mul_90
[debug] : 0, encoder_layer_22_post_att_layer_norm_scale
[debug] : 0, encoder_layer_22_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_91
[debug] : 0, elementwise_add_45
[debug] : 0, encoder_layer_22_ffn_fc_0.w_0
[debug] : 0, fc_136.tmp_0
[debug] : 0, encoder_layer_22_ffn_fc_0.b_0
[debug] : 0, fc_136.tmp_1
[debug] : 0, fc_136.tmp_2
[debug] : 0, encoder_layer_22_ffn_fc_1.w_0
[debug] : 0, fc_137.tmp_0
[debug] : 0, encoder_layer_22_ffn_fc_1.b_0
[debug] : 0, fc_137.tmp_1
[debug] : 0, dropout_69.tmp_0
[debug] : 0, dropout_69.tmp_1
[debug] : 0, tmp_117
[debug] : 0, reduce_mean_92.tmp_0
[debug] : 0, elementwise_sub_46
[debug] : 0, square_46.tmp_0
[debug] : 0, reduce_mean_93.tmp_0
[debug] : 0, tmp_118
[debug] : 0, rsqrt_46.tmp_0
[debug] : 0, elementwise_mul_92
[debug] : 0, encoder_layer_22_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_22_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_93
[debug] : 0, elementwise_add_46
[debug] : 0, encoder_layer_23_multi_head_att_query_fc.w_0
[debug] : 0, fc_138.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_query_fc.b_0
[debug] : 0, fc_138.tmp_1
[debug] : 0, encoder_layer_23_multi_head_att_key_fc.w_0
[debug] : 0, fc_139.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_key_fc.b_0
[debug] : 0, fc_139.tmp_1
[debug] : 0, encoder_layer_23_multi_head_att_value_fc.w_0
[debug] : 0, fc_140.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_value_fc.b_0
[debug] : 0, fc_140.tmp_1
[debug] : 0, reshape2_92.tmp_0
[debug] : 0, transpose_92.tmp_0
[debug] : 0, transpose_92.tmp_1
[debug] : 0, reshape2_93.tmp_0
[debug] : 0, transpose_93.tmp_0
[debug] : 0, transpose_93.tmp_1
[debug] : 0, reshape2_94.tmp_0
[debug] : 0, transpose_94.tmp_0
[debug] : 0, transpose_94.tmp_1
[debug] : 0, scale_24.tmp_0
[debug] : 0, matmul_47.tmp_0
[debug] : 0, tmp_119
[debug] : 0, softmax_23.tmp_0
[debug] : 0, dropout_70.tmp_0
[debug] : 0, dropout_70.tmp_1
[debug] : 0, matmul_48.tmp_0
[debug] : 0, transpose_95.tmp_0
[debug] : 0, transpose_95.tmp_1
[debug] : 0, reshape2_95.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_output_fc.w_0
[debug] : 0, fc_141.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_output_fc.b_0
[debug] : 0, fc_141.tmp_1
[debug] : 0, dropout_71.tmp_0
[debug] : 0, dropout_71.tmp_1
[debug] : 0, tmp_120
[debug] : 0, reduce_mean_94.tmp_0
[debug] : 0, elementwise_sub_47
[debug] : 0, square_47.tmp_0
[debug] : 0, reduce_mean_95.tmp_0
[debug] : 0, tmp_121
[debug] : 0, rsqrt_47.tmp_0
[debug] : 0, elementwise_mul_94
[debug] : 0, encoder_layer_23_post_att_layer_norm_scale
[debug] : 0, encoder_layer_23_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_95
[debug] : 0, elementwise_add_47
[debug] : 0, encoder_layer_23_ffn_fc_0.w_0
[debug] : 0, fc_142.tmp_0
[debug] : 0, encoder_layer_23_ffn_fc_0.b_0
[debug] : 0, fc_142.tmp_1
[debug] : 0, fc_142.tmp_2
[debug] : 0, encoder_layer_23_ffn_fc_1.w_0
[debug] : 0, fc_143.tmp_0
[debug] : 0, encoder_layer_23_ffn_fc_1.b_0
[debug] : 0, fc_143.tmp_1
[debug] : 0, dropout_72.tmp_0
[debug] : 0, dropout_72.tmp_1
[debug] : 0, tmp_122
[debug] : 0, reduce_mean_96.tmp_0
[debug] : 0, elementwise_sub_48
[debug] : 0, square_48.tmp_0
[debug] : 0, reduce_mean_97.tmp_0
[debug] : 0, tmp_123
[debug] : 0, rsqrt_48.tmp_0
[debug] : 0, elementwise_mul_96
[debug] : 0, encoder_layer_23_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_23_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_97
[debug] : 0, elementwise_add_48
[debug] : 0, slice_0.tmp_0
[debug] : 0, reshape2_96.tmp_0
[debug] : 0, reshape2_96.tmp_1
[debug] : 0, pooled_fc.w_0
[debug] : 0, fc_144.tmp_0
[debug] : 0, pooled_fc.b_0
[debug] : 0, fc_144.tmp_1
[debug] : 0, fc_144.tmp_2
[debug] : 0, senti_cls.senti_cls.dropout_0.tmp_0
[debug] : 0, senti_cls.senti_cls.dropout_0.tmp_1
[debug] : 0, senti_cls.cls_out_w
[debug] : 0, senti_cls.senti_cls.fc_0.tmp_0
[debug] : 0, senti_cls.cls_out_b
[debug] : 0, senti_cls.senti_cls.fc_0.tmp_1
[debug] : 0, senti_cls.senti_cls.softmax_0.tmp_0
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_0
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_1
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_2
[debug] : 0, senti_cls.senti_cls.mean_0.tmp_0
[debug] : 0, reduce_sum_0.tmp_0
0
preparing data...ok!
61
30
name: "reduce_sum_0.tmp_0"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP32
dims: 1
}
}
}
persistable: false
random init params...
Loading pretraining parameters from pretrain/ernie/params...
...@@ -6,7 +6,7 @@ if __name__ == '__main__': ...@@ -6,7 +6,7 @@ if __name__ == '__main__':
max_seqlen = 512 max_seqlen = 512
batch_size = 4 batch_size = 4
num_epochs = 2 num_epochs = 20
lr = 1e-3 lr = 1e-3
vocab_path = './pretrain/ernie/vocab.txt' vocab_path = './pretrain/ernie/vocab.txt'
...@@ -67,7 +67,8 @@ if __name__ == '__main__': ...@@ -67,7 +67,8 @@ if __name__ == '__main__':
cls_pred_head = palm.head.Classify(4, 1024, phase='pred') cls_pred_head = palm.head.Classify(4, 1024, phase='pred')
trainer.build_predict_head(cls_pred_head, pred_ernie) trainer.build_predict_head(cls_pred_head, pred_ernie)
trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict') # trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
trainer.train(iterator_fn, print_steps=1)
# trainer.save() # trainer.save()
......
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=4
python run.py python run.py
...@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone): ...@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
input_mask = inputs['input_mask'] input_mask = inputs['input_mask']
task_ids = inputs['task_ids'] task_ids = inputs['task_ids']
fluid.layers.Print(src_ids)
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
......
...@@ -5,5 +5,5 @@ import multiprocessing ...@@ -5,5 +5,5 @@ import multiprocessing
gpu_dev_count = int(fluid.core.get_cuda_device_count()) gpu_dev_count = int(fluid.core.get_cuda_device_count())
cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
from reader import yield_pieces, data_feeder from reader import yield_pieces, data_feeder, decode_fake
...@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size): ...@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
distribute_strategy: support s=split, c=copy, u=unstack, distribute_strategy: support s=split, c=copy, u=unstack,
""" """
assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count." assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count."
print('data in yield pieces') # print('data in yield pieces')
print(len(data)) # print(len(data))
assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)] assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)]
assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)] assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)]
...@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size): ...@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
if type(data) == dict: if type(data) == dict:
yield dict(zip(*[keys, temp])) yield dict(zip(*[keys, temp]))
else: else:
print('yielded pieces') # print('yielded pieces')
print(len(temp)) # print(len(temp))
yield temp yield temp
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): def data_feeder(reader, postprocess_fn=None, prefetch_steps=2, phase='train'):
if postprocess_fn is None: if postprocess_fn is None:
def postprocess_fn(batch): def postprocess_fn(batch):
return batch return batch
...@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue.task_done() queue.task_done()
if ret is not None: if ret is not None:
batches, num_pad = ret batches, num_pad = ret
id = batches[0]['__task_id'][0][0] if phase == 'train' else -1
batch_buf = [] batch_buf = []
flag_buf = [] flag_buf = []
for idx, batch in enumerate(batches): for idx, batch in enumerate(batches):
...@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag = idx-len(batches) < -num_pad flag = idx-len(batches) < -num_pad
# if num_pad > 0: # if num_pad > 0:
# num_pad -= 1 # num_pad -= 1
batch = postprocess_fn(batch) batch = postprocess_fn(batch, id)
batch_buf.append(batch) batch_buf.append(batch)
flag_buf.append(flag) flag_buf.append(flag)
yield batch_buf, flag_buf yield batch_buf, flag_buf, id
else: else:
break break
queue.join() queue.join()
def decode_fake(nums, mask, bs):
n_t = 0
for flag in mask:
if not flag:
break
n_t = n_t + 1
n_f = len(mask) - n_t
p1 = nums - (n_t-1) * bs
each_f = p1 / (n_f+1)
return each_f * n_f
...@@ -37,6 +37,8 @@ class Adam(BaseOptimizer): ...@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
if self._lr_schedualer is not None: if self._lr_schedualer is not None:
self._lr = self._lr_schedualer.build(self._lr) self._lr = self._lr_schedualer.build(self._lr)
fluid.layers.Print(self._lr)
optimizer = fluid.optimizer.Adam(learning_rate=self._lr) optimizer = fluid.optimizer.Adam(learning_rate=self._lr)
if grad_clip is not None: if grad_clip is not None:
...@@ -46,6 +48,7 @@ class Adam(BaseOptimizer): ...@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
fluid.clip.set_gradient_clip( fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres)) clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
print(self._loss)
_, param_grads = optimizer.minimize(self._loss) _, param_grads = optimizer.minimize(self._loss)
return param_grads return param_grads
......
...@@ -8,8 +8,9 @@ class BaseOptimizer(): ...@@ -8,8 +8,9 @@ class BaseOptimizer():
def build(self, grad_clip=None): def build(self, grad_clip=None):
pass pass
def _set_prog(self, prog): def _set_prog(self, prog, init_prog):
self._prog = prog self._prog = prog
self._init_prog = prog
if self._lr_schedualer is not None: if self._lr_schedualer is not None:
self._lr_schedualer._set_prog(prog) self._lr_schedualer._set_prog(prog)
......
...@@ -21,7 +21,7 @@ import time ...@@ -21,7 +21,7 @@ import time
import numpy as np import numpy as np
import paddlepalm.utils.basic_helper as helper import paddlepalm.utils.basic_helper as helper
from paddlepalm.utils import reader_helper, saver from paddlepalm.utils import reader_helper, saver
from paddlepalm.distribute import gpu_dev_count, data_feeder from paddlepalm.distribute import gpu_dev_count, data_feeder, decode_fake
# from paddlepalm.default_settings import * # from paddlepalm.default_settings import *
DEBUG=False DEBUG=False
...@@ -217,12 +217,16 @@ class Trainer(object): ...@@ -217,12 +217,16 @@ class Trainer(object):
with fluid.program_guard(train_prog, train_init_prog): with fluid.program_guard(train_prog, train_init_prog):
loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss']) loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss'])
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name) for _id, block in enumerate(self._train_prog.blocks):
for var in block.vars:
print("[debug] : %d, %s" % (_id, var))
return loss_var return loss_var
def build_backward(self, optimizer, weight_decay=None, use_ema=False, ema_decay=0.9999): def build_backward(self, optimizer, weight_decay=None, use_ema=False, ema_decay=0.9999):
# build optimizer # build optimizer
optimizer._set_prog(self._train_prog) assert self._train_init_prog is not None, "train graph not foung! You should build_forward first."
optimizer._set_prog(self._train_prog, self._train_init_prog)
with fluid.program_guard(self._train_prog, self._train_init_prog): with fluid.program_guard(self._train_prog, self._train_init_prog):
param_grads = optimizer.build() param_grads = optimizer.build()
...@@ -258,6 +262,13 @@ class Trainer(object): ...@@ -258,6 +262,13 @@ class Trainer(object):
ema = fluid.optimizer.ExponentialMovingAverage(ema_decay) ema = fluid.optimizer.ExponentialMovingAverage(ema_decay)
ema.update() ema.update()
# for bid, block in enumerate(self._train_prog.blocks):
# print('block id: '+str(bid))
# for var in block.vars:
# print("%d : %s" % (bid, var))
# print(self._train_prog)
def load_data(self, input_file, file_format, batch_size, num_epochs=None, shuffle_train=True): def load_data(self, input_file, file_format, batch_size, num_epochs=None, shuffle_train=True):
# load data # load data
print("preparing data...", end='') print("preparing data...", end='')
...@@ -287,6 +298,7 @@ class Trainer(object): ...@@ -287,6 +298,7 @@ class Trainer(object):
def random_init_params(self): def random_init_params(self):
assert self._train_init_prog is not None, "train graph not foung! You should build_forward first before you random init parameters." assert self._train_init_prog is not None, "train graph not foung! You should build_forward first before you random init parameters."
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name)
on_gpu = gpu_dev_count > 0 on_gpu = gpu_dev_count > 0
self._exe = helper.build_executor(on_gpu) self._exe = helper.build_executor(on_gpu)
print('random init params...') print('random init params...')
...@@ -294,7 +306,7 @@ class Trainer(object): ...@@ -294,7 +306,7 @@ class Trainer(object):
def load_ckpt(self, model_path, phase='train'): def load_ckpt(self, model_path, phase='train'):
# load pretrain model (or ckpt) # load pretrain model (or ckpt)
assert self._exe is not None, "You need to random_init_params before load pretrain models." assert self._exe is not None, "You need to random_init_params before load checkpoints."
if phase == 'train': if phase == 'train':
assert self._train_init_prog is not None, "train graph not found! You should build_forward first before load checkpoint." assert self._train_init_prog is not None, "train graph not found! You should build_forward first before load checkpoint."
...@@ -437,12 +449,12 @@ class Trainer(object): ...@@ -437,12 +449,12 @@ class Trainer(object):
def predict_one_batch(self, batch): def predict_one_batch(self, batch):
if gpu_dev_count > 1: if gpu_dev_count > 1:
feed, mask = batch feed, mask = batch
rt_outputs = self.exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list) rt_outputs = self.exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
while mask.pop() == False: while mask.pop() == False:
rt_outputs.pop() rt_outputs.pop()
else: else:
feed = self._feed_batch_process_fn(batch) feed = self._feed_batch_process_fn(batch)
rt_outputs = self._exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list) rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册