Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
df98c24f
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
df98c24f
编写于
1月 10, 2020
作者:
X
xixiaoyao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix pred
上级
8a99149a
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
1854 addition
and
20 deletion
+1854
-20
demo/demo3/log
demo/demo3/log
+1803
-0
demo/demo3/run.py
demo/demo3/run.py
+3
-2
demo/demo3/run.sh
demo/demo3/run.sh
+1
-1
paddlepalm/.trainer.py.swp
paddlepalm/.trainer.py.swp
+0
-0
paddlepalm/backbone/ernie.py
paddlepalm/backbone/ernie.py
+2
-0
paddlepalm/distribute/__init__.py
paddlepalm/distribute/__init__.py
+1
-1
paddlepalm/distribute/reader.py
paddlepalm/distribute/reader.py
+21
-9
paddlepalm/optimizer/adam.py
paddlepalm/optimizer/adam.py
+3
-0
paddlepalm/optimizer/base_optimizer.py
paddlepalm/optimizer/base_optimizer.py
+2
-1
paddlepalm/trainer.py
paddlepalm/trainer.py
+18
-6
未找到文件。
demo/demo3/log
0 → 100644
浏览文件 @
df98c24f
{'token_ids': [[-1, -1], 'int64'], 'label_ids': [[-1], 'int64']}
<paddlepalm.backbone.ernie.ERNIE object at 0x7fcf583f53d0>
{'token_ids': [[-1, -1], 'int64'], 'label_ids': [[-1], 'int64'], u'input_mask': [[-1, -1, 1], 'float32'], u'position_ids': [[-1, -1], 'int64'], u'task_ids': [[-1, -1], 'int64'], u'segment_ids': [[-1, -1], 'int64']}
[debug] : 0, input_mask
[debug] : 0, position_ids
[debug] : 0, segment_ids
[debug] : 0, task_ids
[debug] : 0, token_ids
[debug] : 0, senti_cls.label_ids
[debug] : 0, print_token_ids_0.tmp_0
[debug] : 0, word_embedding
[debug] : 0, embedding_0.tmp_0
[debug] : 0, pos_embedding
[debug] : 0, embedding_1.tmp_0
[debug] : 0, sent_embedding
[debug] : 0, embedding_2.tmp_0
[debug] : 0, tmp_0
[debug] : 0, tmp_1
[debug] : 0, task_embedding
[debug] : 0, embedding_3.tmp_0
[debug] : 0, tmp_2
[debug] : 0, reduce_mean_0.tmp_0
[debug] : 0, elementwise_sub_0
[debug] : 0, square_0.tmp_0
[debug] : 0, reduce_mean_1.tmp_0
[debug] : 0, tmp_3
[debug] : 0, rsqrt_0.tmp_0
[debug] : 0, elementwise_mul_0
[debug] : 0, pre_encoder_layer_norm_scale
[debug] : 0, pre_encoder_layer_norm_bias
[debug] : 0, elementwise_mul_1
[debug] : 0, elementwise_add_0
[debug] : 0, dropout_0.tmp_0
[debug] : 0, dropout_0.tmp_1
[debug] : 0, matmul_0.tmp_0
[debug] : 0, scale_0.tmp_0
[debug] : 0, stack_0.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_query_fc.w_0
[debug] : 0, fc_0.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_query_fc.b_0
[debug] : 0, fc_0.tmp_1
[debug] : 0, encoder_layer_0_multi_head_att_key_fc.w_0
[debug] : 0, fc_1.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_key_fc.b_0
[debug] : 0, fc_1.tmp_1
[debug] : 0, encoder_layer_0_multi_head_att_value_fc.w_0
[debug] : 0, fc_2.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_value_fc.b_0
[debug] : 0, fc_2.tmp_1
[debug] : 0, reshape2_0.tmp_0
[debug] : 0, transpose_0.tmp_0
[debug] : 0, transpose_0.tmp_1
[debug] : 0, reshape2_1.tmp_0
[debug] : 0, transpose_1.tmp_0
[debug] : 0, transpose_1.tmp_1
[debug] : 0, reshape2_2.tmp_0
[debug] : 0, transpose_2.tmp_0
[debug] : 0, transpose_2.tmp_1
[debug] : 0, scale_1.tmp_0
[debug] : 0, matmul_1.tmp_0
[debug] : 0, tmp_4
[debug] : 0, softmax_0.tmp_0
[debug] : 0, dropout_1.tmp_0
[debug] : 0, dropout_1.tmp_1
[debug] : 0, matmul_2.tmp_0
[debug] : 0, transpose_3.tmp_0
[debug] : 0, transpose_3.tmp_1
[debug] : 0, reshape2_3.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_output_fc.w_0
[debug] : 0, fc_3.tmp_0
[debug] : 0, encoder_layer_0_multi_head_att_output_fc.b_0
[debug] : 0, fc_3.tmp_1
[debug] : 0, dropout_2.tmp_0
[debug] : 0, dropout_2.tmp_1
[debug] : 0, tmp_5
[debug] : 0, reduce_mean_2.tmp_0
[debug] : 0, elementwise_sub_1
[debug] : 0, square_1.tmp_0
[debug] : 0, reduce_mean_3.tmp_0
[debug] : 0, tmp_6
[debug] : 0, rsqrt_1.tmp_0
[debug] : 0, elementwise_mul_2
[debug] : 0, encoder_layer_0_post_att_layer_norm_scale
[debug] : 0, encoder_layer_0_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_3
[debug] : 0, elementwise_add_1
[debug] : 0, encoder_layer_0_ffn_fc_0.w_0
[debug] : 0, fc_4.tmp_0
[debug] : 0, encoder_layer_0_ffn_fc_0.b_0
[debug] : 0, fc_4.tmp_1
[debug] : 0, fc_4.tmp_2
[debug] : 0, encoder_layer_0_ffn_fc_1.w_0
[debug] : 0, fc_5.tmp_0
[debug] : 0, encoder_layer_0_ffn_fc_1.b_0
[debug] : 0, fc_5.tmp_1
[debug] : 0, dropout_3.tmp_0
[debug] : 0, dropout_3.tmp_1
[debug] : 0, tmp_7
[debug] : 0, reduce_mean_4.tmp_0
[debug] : 0, elementwise_sub_2
[debug] : 0, square_2.tmp_0
[debug] : 0, reduce_mean_5.tmp_0
[debug] : 0, tmp_8
[debug] : 0, rsqrt_2.tmp_0
[debug] : 0, elementwise_mul_4
[debug] : 0, encoder_layer_0_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_0_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_5
[debug] : 0, elementwise_add_2
[debug] : 0, encoder_layer_1_multi_head_att_query_fc.w_0
[debug] : 0, fc_6.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_query_fc.b_0
[debug] : 0, fc_6.tmp_1
[debug] : 0, encoder_layer_1_multi_head_att_key_fc.w_0
[debug] : 0, fc_7.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_key_fc.b_0
[debug] : 0, fc_7.tmp_1
[debug] : 0, encoder_layer_1_multi_head_att_value_fc.w_0
[debug] : 0, fc_8.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_value_fc.b_0
[debug] : 0, fc_8.tmp_1
[debug] : 0, reshape2_4.tmp_0
[debug] : 0, transpose_4.tmp_0
[debug] : 0, transpose_4.tmp_1
[debug] : 0, reshape2_5.tmp_0
[debug] : 0, transpose_5.tmp_0
[debug] : 0, transpose_5.tmp_1
[debug] : 0, reshape2_6.tmp_0
[debug] : 0, transpose_6.tmp_0
[debug] : 0, transpose_6.tmp_1
[debug] : 0, scale_2.tmp_0
[debug] : 0, matmul_3.tmp_0
[debug] : 0, tmp_9
[debug] : 0, softmax_1.tmp_0
[debug] : 0, dropout_4.tmp_0
[debug] : 0, dropout_4.tmp_1
[debug] : 0, matmul_4.tmp_0
[debug] : 0, transpose_7.tmp_0
[debug] : 0, transpose_7.tmp_1
[debug] : 0, reshape2_7.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_output_fc.w_0
[debug] : 0, fc_9.tmp_0
[debug] : 0, encoder_layer_1_multi_head_att_output_fc.b_0
[debug] : 0, fc_9.tmp_1
[debug] : 0, dropout_5.tmp_0
[debug] : 0, dropout_5.tmp_1
[debug] : 0, tmp_10
[debug] : 0, reduce_mean_6.tmp_0
[debug] : 0, elementwise_sub_3
[debug] : 0, square_3.tmp_0
[debug] : 0, reduce_mean_7.tmp_0
[debug] : 0, tmp_11
[debug] : 0, rsqrt_3.tmp_0
[debug] : 0, elementwise_mul_6
[debug] : 0, encoder_layer_1_post_att_layer_norm_scale
[debug] : 0, encoder_layer_1_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_7
[debug] : 0, elementwise_add_3
[debug] : 0, encoder_layer_1_ffn_fc_0.w_0
[debug] : 0, fc_10.tmp_0
[debug] : 0, encoder_layer_1_ffn_fc_0.b_0
[debug] : 0, fc_10.tmp_1
[debug] : 0, fc_10.tmp_2
[debug] : 0, encoder_layer_1_ffn_fc_1.w_0
[debug] : 0, fc_11.tmp_0
[debug] : 0, encoder_layer_1_ffn_fc_1.b_0
[debug] : 0, fc_11.tmp_1
[debug] : 0, dropout_6.tmp_0
[debug] : 0, dropout_6.tmp_1
[debug] : 0, tmp_12
[debug] : 0, reduce_mean_8.tmp_0
[debug] : 0, elementwise_sub_4
[debug] : 0, square_4.tmp_0
[debug] : 0, reduce_mean_9.tmp_0
[debug] : 0, tmp_13
[debug] : 0, rsqrt_4.tmp_0
[debug] : 0, elementwise_mul_8
[debug] : 0, encoder_layer_1_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_1_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_9
[debug] : 0, elementwise_add_4
[debug] : 0, encoder_layer_2_multi_head_att_query_fc.w_0
[debug] : 0, fc_12.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_query_fc.b_0
[debug] : 0, fc_12.tmp_1
[debug] : 0, encoder_layer_2_multi_head_att_key_fc.w_0
[debug] : 0, fc_13.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_key_fc.b_0
[debug] : 0, fc_13.tmp_1
[debug] : 0, encoder_layer_2_multi_head_att_value_fc.w_0
[debug] : 0, fc_14.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_value_fc.b_0
[debug] : 0, fc_14.tmp_1
[debug] : 0, reshape2_8.tmp_0
[debug] : 0, transpose_8.tmp_0
[debug] : 0, transpose_8.tmp_1
[debug] : 0, reshape2_9.tmp_0
[debug] : 0, transpose_9.tmp_0
[debug] : 0, transpose_9.tmp_1
[debug] : 0, reshape2_10.tmp_0
[debug] : 0, transpose_10.tmp_0
[debug] : 0, transpose_10.tmp_1
[debug] : 0, scale_3.tmp_0
[debug] : 0, matmul_5.tmp_0
[debug] : 0, tmp_14
[debug] : 0, softmax_2.tmp_0
[debug] : 0, dropout_7.tmp_0
[debug] : 0, dropout_7.tmp_1
[debug] : 0, matmul_6.tmp_0
[debug] : 0, transpose_11.tmp_0
[debug] : 0, transpose_11.tmp_1
[debug] : 0, reshape2_11.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_output_fc.w_0
[debug] : 0, fc_15.tmp_0
[debug] : 0, encoder_layer_2_multi_head_att_output_fc.b_0
[debug] : 0, fc_15.tmp_1
[debug] : 0, dropout_8.tmp_0
[debug] : 0, dropout_8.tmp_1
[debug] : 0, tmp_15
[debug] : 0, reduce_mean_10.tmp_0
[debug] : 0, elementwise_sub_5
[debug] : 0, square_5.tmp_0
[debug] : 0, reduce_mean_11.tmp_0
[debug] : 0, tmp_16
[debug] : 0, rsqrt_5.tmp_0
[debug] : 0, elementwise_mul_10
[debug] : 0, encoder_layer_2_post_att_layer_norm_scale
[debug] : 0, encoder_layer_2_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_11
[debug] : 0, elementwise_add_5
[debug] : 0, encoder_layer_2_ffn_fc_0.w_0
[debug] : 0, fc_16.tmp_0
[debug] : 0, encoder_layer_2_ffn_fc_0.b_0
[debug] : 0, fc_16.tmp_1
[debug] : 0, fc_16.tmp_2
[debug] : 0, encoder_layer_2_ffn_fc_1.w_0
[debug] : 0, fc_17.tmp_0
[debug] : 0, encoder_layer_2_ffn_fc_1.b_0
[debug] : 0, fc_17.tmp_1
[debug] : 0, dropout_9.tmp_0
[debug] : 0, dropout_9.tmp_1
[debug] : 0, tmp_17
[debug] : 0, reduce_mean_12.tmp_0
[debug] : 0, elementwise_sub_6
[debug] : 0, square_6.tmp_0
[debug] : 0, reduce_mean_13.tmp_0
[debug] : 0, tmp_18
[debug] : 0, rsqrt_6.tmp_0
[debug] : 0, elementwise_mul_12
[debug] : 0, encoder_layer_2_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_2_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_13
[debug] : 0, elementwise_add_6
[debug] : 0, encoder_layer_3_multi_head_att_query_fc.w_0
[debug] : 0, fc_18.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_query_fc.b_0
[debug] : 0, fc_18.tmp_1
[debug] : 0, encoder_layer_3_multi_head_att_key_fc.w_0
[debug] : 0, fc_19.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_key_fc.b_0
[debug] : 0, fc_19.tmp_1
[debug] : 0, encoder_layer_3_multi_head_att_value_fc.w_0
[debug] : 0, fc_20.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_value_fc.b_0
[debug] : 0, fc_20.tmp_1
[debug] : 0, reshape2_12.tmp_0
[debug] : 0, transpose_12.tmp_0
[debug] : 0, transpose_12.tmp_1
[debug] : 0, reshape2_13.tmp_0
[debug] : 0, transpose_13.tmp_0
[debug] : 0, transpose_13.tmp_1
[debug] : 0, reshape2_14.tmp_0
[debug] : 0, transpose_14.tmp_0
[debug] : 0, transpose_14.tmp_1
[debug] : 0, scale_4.tmp_0
[debug] : 0, matmul_7.tmp_0
[debug] : 0, tmp_19
[debug] : 0, softmax_3.tmp_0
[debug] : 0, dropout_10.tmp_0
[debug] : 0, dropout_10.tmp_1
[debug] : 0, matmul_8.tmp_0
[debug] : 0, transpose_15.tmp_0
[debug] : 0, transpose_15.tmp_1
[debug] : 0, reshape2_15.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_output_fc.w_0
[debug] : 0, fc_21.tmp_0
[debug] : 0, encoder_layer_3_multi_head_att_output_fc.b_0
[debug] : 0, fc_21.tmp_1
[debug] : 0, dropout_11.tmp_0
[debug] : 0, dropout_11.tmp_1
[debug] : 0, tmp_20
[debug] : 0, reduce_mean_14.tmp_0
[debug] : 0, elementwise_sub_7
[debug] : 0, square_7.tmp_0
[debug] : 0, reduce_mean_15.tmp_0
[debug] : 0, tmp_21
[debug] : 0, rsqrt_7.tmp_0
[debug] : 0, elementwise_mul_14
[debug] : 0, encoder_layer_3_post_att_layer_norm_scale
[debug] : 0, encoder_layer_3_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_15
[debug] : 0, elementwise_add_7
[debug] : 0, encoder_layer_3_ffn_fc_0.w_0
[debug] : 0, fc_22.tmp_0
[debug] : 0, encoder_layer_3_ffn_fc_0.b_0
[debug] : 0, fc_22.tmp_1
[debug] : 0, fc_22.tmp_2
[debug] : 0, encoder_layer_3_ffn_fc_1.w_0
[debug] : 0, fc_23.tmp_0
[debug] : 0, encoder_layer_3_ffn_fc_1.b_0
[debug] : 0, fc_23.tmp_1
[debug] : 0, dropout_12.tmp_0
[debug] : 0, dropout_12.tmp_1
[debug] : 0, tmp_22
[debug] : 0, reduce_mean_16.tmp_0
[debug] : 0, elementwise_sub_8
[debug] : 0, square_8.tmp_0
[debug] : 0, reduce_mean_17.tmp_0
[debug] : 0, tmp_23
[debug] : 0, rsqrt_8.tmp_0
[debug] : 0, elementwise_mul_16
[debug] : 0, encoder_layer_3_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_3_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_17
[debug] : 0, elementwise_add_8
[debug] : 0, encoder_layer_4_multi_head_att_query_fc.w_0
[debug] : 0, fc_24.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_query_fc.b_0
[debug] : 0, fc_24.tmp_1
[debug] : 0, encoder_layer_4_multi_head_att_key_fc.w_0
[debug] : 0, fc_25.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_key_fc.b_0
[debug] : 0, fc_25.tmp_1
[debug] : 0, encoder_layer_4_multi_head_att_value_fc.w_0
[debug] : 0, fc_26.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_value_fc.b_0
[debug] : 0, fc_26.tmp_1
[debug] : 0, reshape2_16.tmp_0
[debug] : 0, transpose_16.tmp_0
[debug] : 0, transpose_16.tmp_1
[debug] : 0, reshape2_17.tmp_0
[debug] : 0, transpose_17.tmp_0
[debug] : 0, transpose_17.tmp_1
[debug] : 0, reshape2_18.tmp_0
[debug] : 0, transpose_18.tmp_0
[debug] : 0, transpose_18.tmp_1
[debug] : 0, scale_5.tmp_0
[debug] : 0, matmul_9.tmp_0
[debug] : 0, tmp_24
[debug] : 0, softmax_4.tmp_0
[debug] : 0, dropout_13.tmp_0
[debug] : 0, dropout_13.tmp_1
[debug] : 0, matmul_10.tmp_0
[debug] : 0, transpose_19.tmp_0
[debug] : 0, transpose_19.tmp_1
[debug] : 0, reshape2_19.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_output_fc.w_0
[debug] : 0, fc_27.tmp_0
[debug] : 0, encoder_layer_4_multi_head_att_output_fc.b_0
[debug] : 0, fc_27.tmp_1
[debug] : 0, dropout_14.tmp_0
[debug] : 0, dropout_14.tmp_1
[debug] : 0, tmp_25
[debug] : 0, reduce_mean_18.tmp_0
[debug] : 0, elementwise_sub_9
[debug] : 0, square_9.tmp_0
[debug] : 0, reduce_mean_19.tmp_0
[debug] : 0, tmp_26
[debug] : 0, rsqrt_9.tmp_0
[debug] : 0, elementwise_mul_18
[debug] : 0, encoder_layer_4_post_att_layer_norm_scale
[debug] : 0, encoder_layer_4_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_19
[debug] : 0, elementwise_add_9
[debug] : 0, encoder_layer_4_ffn_fc_0.w_0
[debug] : 0, fc_28.tmp_0
[debug] : 0, encoder_layer_4_ffn_fc_0.b_0
[debug] : 0, fc_28.tmp_1
[debug] : 0, fc_28.tmp_2
[debug] : 0, encoder_layer_4_ffn_fc_1.w_0
[debug] : 0, fc_29.tmp_0
[debug] : 0, encoder_layer_4_ffn_fc_1.b_0
[debug] : 0, fc_29.tmp_1
[debug] : 0, dropout_15.tmp_0
[debug] : 0, dropout_15.tmp_1
[debug] : 0, tmp_27
[debug] : 0, reduce_mean_20.tmp_0
[debug] : 0, elementwise_sub_10
[debug] : 0, square_10.tmp_0
[debug] : 0, reduce_mean_21.tmp_0
[debug] : 0, tmp_28
[debug] : 0, rsqrt_10.tmp_0
[debug] : 0, elementwise_mul_20
[debug] : 0, encoder_layer_4_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_4_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_21
[debug] : 0, elementwise_add_10
[debug] : 0, encoder_layer_5_multi_head_att_query_fc.w_0
[debug] : 0, fc_30.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_query_fc.b_0
[debug] : 0, fc_30.tmp_1
[debug] : 0, encoder_layer_5_multi_head_att_key_fc.w_0
[debug] : 0, fc_31.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_key_fc.b_0
[debug] : 0, fc_31.tmp_1
[debug] : 0, encoder_layer_5_multi_head_att_value_fc.w_0
[debug] : 0, fc_32.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_value_fc.b_0
[debug] : 0, fc_32.tmp_1
[debug] : 0, reshape2_20.tmp_0
[debug] : 0, transpose_20.tmp_0
[debug] : 0, transpose_20.tmp_1
[debug] : 0, reshape2_21.tmp_0
[debug] : 0, transpose_21.tmp_0
[debug] : 0, transpose_21.tmp_1
[debug] : 0, reshape2_22.tmp_0
[debug] : 0, transpose_22.tmp_0
[debug] : 0, transpose_22.tmp_1
[debug] : 0, scale_6.tmp_0
[debug] : 0, matmul_11.tmp_0
[debug] : 0, tmp_29
[debug] : 0, softmax_5.tmp_0
[debug] : 0, dropout_16.tmp_0
[debug] : 0, dropout_16.tmp_1
[debug] : 0, matmul_12.tmp_0
[debug] : 0, transpose_23.tmp_0
[debug] : 0, transpose_23.tmp_1
[debug] : 0, reshape2_23.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_output_fc.w_0
[debug] : 0, fc_33.tmp_0
[debug] : 0, encoder_layer_5_multi_head_att_output_fc.b_0
[debug] : 0, fc_33.tmp_1
[debug] : 0, dropout_17.tmp_0
[debug] : 0, dropout_17.tmp_1
[debug] : 0, tmp_30
[debug] : 0, reduce_mean_22.tmp_0
[debug] : 0, elementwise_sub_11
[debug] : 0, square_11.tmp_0
[debug] : 0, reduce_mean_23.tmp_0
[debug] : 0, tmp_31
[debug] : 0, rsqrt_11.tmp_0
[debug] : 0, elementwise_mul_22
[debug] : 0, encoder_layer_5_post_att_layer_norm_scale
[debug] : 0, encoder_layer_5_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_23
[debug] : 0, elementwise_add_11
[debug] : 0, encoder_layer_5_ffn_fc_0.w_0
[debug] : 0, fc_34.tmp_0
[debug] : 0, encoder_layer_5_ffn_fc_0.b_0
[debug] : 0, fc_34.tmp_1
[debug] : 0, fc_34.tmp_2
[debug] : 0, encoder_layer_5_ffn_fc_1.w_0
[debug] : 0, fc_35.tmp_0
[debug] : 0, encoder_layer_5_ffn_fc_1.b_0
[debug] : 0, fc_35.tmp_1
[debug] : 0, dropout_18.tmp_0
[debug] : 0, dropout_18.tmp_1
[debug] : 0, tmp_32
[debug] : 0, reduce_mean_24.tmp_0
[debug] : 0, elementwise_sub_12
[debug] : 0, square_12.tmp_0
[debug] : 0, reduce_mean_25.tmp_0
[debug] : 0, tmp_33
[debug] : 0, rsqrt_12.tmp_0
[debug] : 0, elementwise_mul_24
[debug] : 0, encoder_layer_5_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_5_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_25
[debug] : 0, elementwise_add_12
[debug] : 0, encoder_layer_6_multi_head_att_query_fc.w_0
[debug] : 0, fc_36.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_query_fc.b_0
[debug] : 0, fc_36.tmp_1
[debug] : 0, encoder_layer_6_multi_head_att_key_fc.w_0
[debug] : 0, fc_37.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_key_fc.b_0
[debug] : 0, fc_37.tmp_1
[debug] : 0, encoder_layer_6_multi_head_att_value_fc.w_0
[debug] : 0, fc_38.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_value_fc.b_0
[debug] : 0, fc_38.tmp_1
[debug] : 0, reshape2_24.tmp_0
[debug] : 0, transpose_24.tmp_0
[debug] : 0, transpose_24.tmp_1
[debug] : 0, reshape2_25.tmp_0
[debug] : 0, transpose_25.tmp_0
[debug] : 0, transpose_25.tmp_1
[debug] : 0, reshape2_26.tmp_0
[debug] : 0, transpose_26.tmp_0
[debug] : 0, transpose_26.tmp_1
[debug] : 0, scale_7.tmp_0
[debug] : 0, matmul_13.tmp_0
[debug] : 0, tmp_34
[debug] : 0, softmax_6.tmp_0
[debug] : 0, dropout_19.tmp_0
[debug] : 0, dropout_19.tmp_1
[debug] : 0, matmul_14.tmp_0
[debug] : 0, transpose_27.tmp_0
[debug] : 0, transpose_27.tmp_1
[debug] : 0, reshape2_27.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_output_fc.w_0
[debug] : 0, fc_39.tmp_0
[debug] : 0, encoder_layer_6_multi_head_att_output_fc.b_0
[debug] : 0, fc_39.tmp_1
[debug] : 0, dropout_20.tmp_0
[debug] : 0, dropout_20.tmp_1
[debug] : 0, tmp_35
[debug] : 0, reduce_mean_26.tmp_0
[debug] : 0, elementwise_sub_13
[debug] : 0, square_13.tmp_0
[debug] : 0, reduce_mean_27.tmp_0
[debug] : 0, tmp_36
[debug] : 0, rsqrt_13.tmp_0
[debug] : 0, elementwise_mul_26
[debug] : 0, encoder_layer_6_post_att_layer_norm_scale
[debug] : 0, encoder_layer_6_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_27
[debug] : 0, elementwise_add_13
[debug] : 0, encoder_layer_6_ffn_fc_0.w_0
[debug] : 0, fc_40.tmp_0
[debug] : 0, encoder_layer_6_ffn_fc_0.b_0
[debug] : 0, fc_40.tmp_1
[debug] : 0, fc_40.tmp_2
[debug] : 0, encoder_layer_6_ffn_fc_1.w_0
[debug] : 0, fc_41.tmp_0
[debug] : 0, encoder_layer_6_ffn_fc_1.b_0
[debug] : 0, fc_41.tmp_1
[debug] : 0, dropout_21.tmp_0
[debug] : 0, dropout_21.tmp_1
[debug] : 0, tmp_37
[debug] : 0, reduce_mean_28.tmp_0
[debug] : 0, elementwise_sub_14
[debug] : 0, square_14.tmp_0
[debug] : 0, reduce_mean_29.tmp_0
[debug] : 0, tmp_38
[debug] : 0, rsqrt_14.tmp_0
[debug] : 0, elementwise_mul_28
[debug] : 0, encoder_layer_6_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_6_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_29
[debug] : 0, elementwise_add_14
[debug] : 0, encoder_layer_7_multi_head_att_query_fc.w_0
[debug] : 0, fc_42.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_query_fc.b_0
[debug] : 0, fc_42.tmp_1
[debug] : 0, encoder_layer_7_multi_head_att_key_fc.w_0
[debug] : 0, fc_43.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_key_fc.b_0
[debug] : 0, fc_43.tmp_1
[debug] : 0, encoder_layer_7_multi_head_att_value_fc.w_0
[debug] : 0, fc_44.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_value_fc.b_0
[debug] : 0, fc_44.tmp_1
[debug] : 0, reshape2_28.tmp_0
[debug] : 0, transpose_28.tmp_0
[debug] : 0, transpose_28.tmp_1
[debug] : 0, reshape2_29.tmp_0
[debug] : 0, transpose_29.tmp_0
[debug] : 0, transpose_29.tmp_1
[debug] : 0, reshape2_30.tmp_0
[debug] : 0, transpose_30.tmp_0
[debug] : 0, transpose_30.tmp_1
[debug] : 0, scale_8.tmp_0
[debug] : 0, matmul_15.tmp_0
[debug] : 0, tmp_39
[debug] : 0, softmax_7.tmp_0
[debug] : 0, dropout_22.tmp_0
[debug] : 0, dropout_22.tmp_1
[debug] : 0, matmul_16.tmp_0
[debug] : 0, transpose_31.tmp_0
[debug] : 0, transpose_31.tmp_1
[debug] : 0, reshape2_31.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_output_fc.w_0
[debug] : 0, fc_45.tmp_0
[debug] : 0, encoder_layer_7_multi_head_att_output_fc.b_0
[debug] : 0, fc_45.tmp_1
[debug] : 0, dropout_23.tmp_0
[debug] : 0, dropout_23.tmp_1
[debug] : 0, tmp_40
[debug] : 0, reduce_mean_30.tmp_0
[debug] : 0, elementwise_sub_15
[debug] : 0, square_15.tmp_0
[debug] : 0, reduce_mean_31.tmp_0
[debug] : 0, tmp_41
[debug] : 0, rsqrt_15.tmp_0
[debug] : 0, elementwise_mul_30
[debug] : 0, encoder_layer_7_post_att_layer_norm_scale
[debug] : 0, encoder_layer_7_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_31
[debug] : 0, elementwise_add_15
[debug] : 0, encoder_layer_7_ffn_fc_0.w_0
[debug] : 0, fc_46.tmp_0
[debug] : 0, encoder_layer_7_ffn_fc_0.b_0
[debug] : 0, fc_46.tmp_1
[debug] : 0, fc_46.tmp_2
[debug] : 0, encoder_layer_7_ffn_fc_1.w_0
[debug] : 0, fc_47.tmp_0
[debug] : 0, encoder_layer_7_ffn_fc_1.b_0
[debug] : 0, fc_47.tmp_1
[debug] : 0, dropout_24.tmp_0
[debug] : 0, dropout_24.tmp_1
[debug] : 0, tmp_42
[debug] : 0, reduce_mean_32.tmp_0
[debug] : 0, elementwise_sub_16
[debug] : 0, square_16.tmp_0
[debug] : 0, reduce_mean_33.tmp_0
[debug] : 0, tmp_43
[debug] : 0, rsqrt_16.tmp_0
[debug] : 0, elementwise_mul_32
[debug] : 0, encoder_layer_7_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_7_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_33
[debug] : 0, elementwise_add_16
[debug] : 0, encoder_layer_8_multi_head_att_query_fc.w_0
[debug] : 0, fc_48.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_query_fc.b_0
[debug] : 0, fc_48.tmp_1
[debug] : 0, encoder_layer_8_multi_head_att_key_fc.w_0
[debug] : 0, fc_49.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_key_fc.b_0
[debug] : 0, fc_49.tmp_1
[debug] : 0, encoder_layer_8_multi_head_att_value_fc.w_0
[debug] : 0, fc_50.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_value_fc.b_0
[debug] : 0, fc_50.tmp_1
[debug] : 0, reshape2_32.tmp_0
[debug] : 0, transpose_32.tmp_0
[debug] : 0, transpose_32.tmp_1
[debug] : 0, reshape2_33.tmp_0
[debug] : 0, transpose_33.tmp_0
[debug] : 0, transpose_33.tmp_1
[debug] : 0, reshape2_34.tmp_0
[debug] : 0, transpose_34.tmp_0
[debug] : 0, transpose_34.tmp_1
[debug] : 0, scale_9.tmp_0
[debug] : 0, matmul_17.tmp_0
[debug] : 0, tmp_44
[debug] : 0, softmax_8.tmp_0
[debug] : 0, dropout_25.tmp_0
[debug] : 0, dropout_25.tmp_1
[debug] : 0, matmul_18.tmp_0
[debug] : 0, transpose_35.tmp_0
[debug] : 0, transpose_35.tmp_1
[debug] : 0, reshape2_35.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_output_fc.w_0
[debug] : 0, fc_51.tmp_0
[debug] : 0, encoder_layer_8_multi_head_att_output_fc.b_0
[debug] : 0, fc_51.tmp_1
[debug] : 0, dropout_26.tmp_0
[debug] : 0, dropout_26.tmp_1
[debug] : 0, tmp_45
[debug] : 0, reduce_mean_34.tmp_0
[debug] : 0, elementwise_sub_17
[debug] : 0, square_17.tmp_0
[debug] : 0, reduce_mean_35.tmp_0
[debug] : 0, tmp_46
[debug] : 0, rsqrt_17.tmp_0
[debug] : 0, elementwise_mul_34
[debug] : 0, encoder_layer_8_post_att_layer_norm_scale
[debug] : 0, encoder_layer_8_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_35
[debug] : 0, elementwise_add_17
[debug] : 0, encoder_layer_8_ffn_fc_0.w_0
[debug] : 0, fc_52.tmp_0
[debug] : 0, encoder_layer_8_ffn_fc_0.b_0
[debug] : 0, fc_52.tmp_1
[debug] : 0, fc_52.tmp_2
[debug] : 0, encoder_layer_8_ffn_fc_1.w_0
[debug] : 0, fc_53.tmp_0
[debug] : 0, encoder_layer_8_ffn_fc_1.b_0
[debug] : 0, fc_53.tmp_1
[debug] : 0, dropout_27.tmp_0
[debug] : 0, dropout_27.tmp_1
[debug] : 0, tmp_47
[debug] : 0, reduce_mean_36.tmp_0
[debug] : 0, elementwise_sub_18
[debug] : 0, square_18.tmp_0
[debug] : 0, reduce_mean_37.tmp_0
[debug] : 0, tmp_48
[debug] : 0, rsqrt_18.tmp_0
[debug] : 0, elementwise_mul_36
[debug] : 0, encoder_layer_8_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_8_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_37
[debug] : 0, elementwise_add_18
[debug] : 0, encoder_layer_9_multi_head_att_query_fc.w_0
[debug] : 0, fc_54.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_query_fc.b_0
[debug] : 0, fc_54.tmp_1
[debug] : 0, encoder_layer_9_multi_head_att_key_fc.w_0
[debug] : 0, fc_55.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_key_fc.b_0
[debug] : 0, fc_55.tmp_1
[debug] : 0, encoder_layer_9_multi_head_att_value_fc.w_0
[debug] : 0, fc_56.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_value_fc.b_0
[debug] : 0, fc_56.tmp_1
[debug] : 0, reshape2_36.tmp_0
[debug] : 0, transpose_36.tmp_0
[debug] : 0, transpose_36.tmp_1
[debug] : 0, reshape2_37.tmp_0
[debug] : 0, transpose_37.tmp_0
[debug] : 0, transpose_37.tmp_1
[debug] : 0, reshape2_38.tmp_0
[debug] : 0, transpose_38.tmp_0
[debug] : 0, transpose_38.tmp_1
[debug] : 0, scale_10.tmp_0
[debug] : 0, matmul_19.tmp_0
[debug] : 0, tmp_49
[debug] : 0, softmax_9.tmp_0
[debug] : 0, dropout_28.tmp_0
[debug] : 0, dropout_28.tmp_1
[debug] : 0, matmul_20.tmp_0
[debug] : 0, transpose_39.tmp_0
[debug] : 0, transpose_39.tmp_1
[debug] : 0, reshape2_39.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_output_fc.w_0
[debug] : 0, fc_57.tmp_0
[debug] : 0, encoder_layer_9_multi_head_att_output_fc.b_0
[debug] : 0, fc_57.tmp_1
[debug] : 0, dropout_29.tmp_0
[debug] : 0, dropout_29.tmp_1
[debug] : 0, tmp_50
[debug] : 0, reduce_mean_38.tmp_0
[debug] : 0, elementwise_sub_19
[debug] : 0, square_19.tmp_0
[debug] : 0, reduce_mean_39.tmp_0
[debug] : 0, tmp_51
[debug] : 0, rsqrt_19.tmp_0
[debug] : 0, elementwise_mul_38
[debug] : 0, encoder_layer_9_post_att_layer_norm_scale
[debug] : 0, encoder_layer_9_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_39
[debug] : 0, elementwise_add_19
[debug] : 0, encoder_layer_9_ffn_fc_0.w_0
[debug] : 0, fc_58.tmp_0
[debug] : 0, encoder_layer_9_ffn_fc_0.b_0
[debug] : 0, fc_58.tmp_1
[debug] : 0, fc_58.tmp_2
[debug] : 0, encoder_layer_9_ffn_fc_1.w_0
[debug] : 0, fc_59.tmp_0
[debug] : 0, encoder_layer_9_ffn_fc_1.b_0
[debug] : 0, fc_59.tmp_1
[debug] : 0, dropout_30.tmp_0
[debug] : 0, dropout_30.tmp_1
[debug] : 0, tmp_52
[debug] : 0, reduce_mean_40.tmp_0
[debug] : 0, elementwise_sub_20
[debug] : 0, square_20.tmp_0
[debug] : 0, reduce_mean_41.tmp_0
[debug] : 0, tmp_53
[debug] : 0, rsqrt_20.tmp_0
[debug] : 0, elementwise_mul_40
[debug] : 0, encoder_layer_9_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_9_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_41
[debug] : 0, elementwise_add_20
[debug] : 0, encoder_layer_10_multi_head_att_query_fc.w_0
[debug] : 0, fc_60.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_query_fc.b_0
[debug] : 0, fc_60.tmp_1
[debug] : 0, encoder_layer_10_multi_head_att_key_fc.w_0
[debug] : 0, fc_61.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_key_fc.b_0
[debug] : 0, fc_61.tmp_1
[debug] : 0, encoder_layer_10_multi_head_att_value_fc.w_0
[debug] : 0, fc_62.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_value_fc.b_0
[debug] : 0, fc_62.tmp_1
[debug] : 0, reshape2_40.tmp_0
[debug] : 0, transpose_40.tmp_0
[debug] : 0, transpose_40.tmp_1
[debug] : 0, reshape2_41.tmp_0
[debug] : 0, transpose_41.tmp_0
[debug] : 0, transpose_41.tmp_1
[debug] : 0, reshape2_42.tmp_0
[debug] : 0, transpose_42.tmp_0
[debug] : 0, transpose_42.tmp_1
[debug] : 0, scale_11.tmp_0
[debug] : 0, matmul_21.tmp_0
[debug] : 0, tmp_54
[debug] : 0, softmax_10.tmp_0
[debug] : 0, dropout_31.tmp_0
[debug] : 0, dropout_31.tmp_1
[debug] : 0, matmul_22.tmp_0
[debug] : 0, transpose_43.tmp_0
[debug] : 0, transpose_43.tmp_1
[debug] : 0, reshape2_43.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_output_fc.w_0
[debug] : 0, fc_63.tmp_0
[debug] : 0, encoder_layer_10_multi_head_att_output_fc.b_0
[debug] : 0, fc_63.tmp_1
[debug] : 0, dropout_32.tmp_0
[debug] : 0, dropout_32.tmp_1
[debug] : 0, tmp_55
[debug] : 0, reduce_mean_42.tmp_0
[debug] : 0, elementwise_sub_21
[debug] : 0, square_21.tmp_0
[debug] : 0, reduce_mean_43.tmp_0
[debug] : 0, tmp_56
[debug] : 0, rsqrt_21.tmp_0
[debug] : 0, elementwise_mul_42
[debug] : 0, encoder_layer_10_post_att_layer_norm_scale
[debug] : 0, encoder_layer_10_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_43
[debug] : 0, elementwise_add_21
[debug] : 0, encoder_layer_10_ffn_fc_0.w_0
[debug] : 0, fc_64.tmp_0
[debug] : 0, encoder_layer_10_ffn_fc_0.b_0
[debug] : 0, fc_64.tmp_1
[debug] : 0, fc_64.tmp_2
[debug] : 0, encoder_layer_10_ffn_fc_1.w_0
[debug] : 0, fc_65.tmp_0
[debug] : 0, encoder_layer_10_ffn_fc_1.b_0
[debug] : 0, fc_65.tmp_1
[debug] : 0, dropout_33.tmp_0
[debug] : 0, dropout_33.tmp_1
[debug] : 0, tmp_57
[debug] : 0, reduce_mean_44.tmp_0
[debug] : 0, elementwise_sub_22
[debug] : 0, square_22.tmp_0
[debug] : 0, reduce_mean_45.tmp_0
[debug] : 0, tmp_58
[debug] : 0, rsqrt_22.tmp_0
[debug] : 0, elementwise_mul_44
[debug] : 0, encoder_layer_10_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_10_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_45
[debug] : 0, elementwise_add_22
[debug] : 0, encoder_layer_11_multi_head_att_query_fc.w_0
[debug] : 0, fc_66.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_query_fc.b_0
[debug] : 0, fc_66.tmp_1
[debug] : 0, encoder_layer_11_multi_head_att_key_fc.w_0
[debug] : 0, fc_67.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_key_fc.b_0
[debug] : 0, fc_67.tmp_1
[debug] : 0, encoder_layer_11_multi_head_att_value_fc.w_0
[debug] : 0, fc_68.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_value_fc.b_0
[debug] : 0, fc_68.tmp_1
[debug] : 0, reshape2_44.tmp_0
[debug] : 0, transpose_44.tmp_0
[debug] : 0, transpose_44.tmp_1
[debug] : 0, reshape2_45.tmp_0
[debug] : 0, transpose_45.tmp_0
[debug] : 0, transpose_45.tmp_1
[debug] : 0, reshape2_46.tmp_0
[debug] : 0, transpose_46.tmp_0
[debug] : 0, transpose_46.tmp_1
[debug] : 0, scale_12.tmp_0
[debug] : 0, matmul_23.tmp_0
[debug] : 0, tmp_59
[debug] : 0, softmax_11.tmp_0
[debug] : 0, dropout_34.tmp_0
[debug] : 0, dropout_34.tmp_1
[debug] : 0, matmul_24.tmp_0
[debug] : 0, transpose_47.tmp_0
[debug] : 0, transpose_47.tmp_1
[debug] : 0, reshape2_47.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_output_fc.w_0
[debug] : 0, fc_69.tmp_0
[debug] : 0, encoder_layer_11_multi_head_att_output_fc.b_0
[debug] : 0, fc_69.tmp_1
[debug] : 0, dropout_35.tmp_0
[debug] : 0, dropout_35.tmp_1
[debug] : 0, tmp_60
[debug] : 0, reduce_mean_46.tmp_0
[debug] : 0, elementwise_sub_23
[debug] : 0, square_23.tmp_0
[debug] : 0, reduce_mean_47.tmp_0
[debug] : 0, tmp_61
[debug] : 0, rsqrt_23.tmp_0
[debug] : 0, elementwise_mul_46
[debug] : 0, encoder_layer_11_post_att_layer_norm_scale
[debug] : 0, encoder_layer_11_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_47
[debug] : 0, elementwise_add_23
[debug] : 0, encoder_layer_11_ffn_fc_0.w_0
[debug] : 0, fc_70.tmp_0
[debug] : 0, encoder_layer_11_ffn_fc_0.b_0
[debug] : 0, fc_70.tmp_1
[debug] : 0, fc_70.tmp_2
[debug] : 0, encoder_layer_11_ffn_fc_1.w_0
[debug] : 0, fc_71.tmp_0
[debug] : 0, encoder_layer_11_ffn_fc_1.b_0
[debug] : 0, fc_71.tmp_1
[debug] : 0, dropout_36.tmp_0
[debug] : 0, dropout_36.tmp_1
[debug] : 0, tmp_62
[debug] : 0, reduce_mean_48.tmp_0
[debug] : 0, elementwise_sub_24
[debug] : 0, square_24.tmp_0
[debug] : 0, reduce_mean_49.tmp_0
[debug] : 0, tmp_63
[debug] : 0, rsqrt_24.tmp_0
[debug] : 0, elementwise_mul_48
[debug] : 0, encoder_layer_11_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_11_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_49
[debug] : 0, elementwise_add_24
[debug] : 0, encoder_layer_12_multi_head_att_query_fc.w_0
[debug] : 0, fc_72.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_query_fc.b_0
[debug] : 0, fc_72.tmp_1
[debug] : 0, encoder_layer_12_multi_head_att_key_fc.w_0
[debug] : 0, fc_73.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_key_fc.b_0
[debug] : 0, fc_73.tmp_1
[debug] : 0, encoder_layer_12_multi_head_att_value_fc.w_0
[debug] : 0, fc_74.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_value_fc.b_0
[debug] : 0, fc_74.tmp_1
[debug] : 0, reshape2_48.tmp_0
[debug] : 0, transpose_48.tmp_0
[debug] : 0, transpose_48.tmp_1
[debug] : 0, reshape2_49.tmp_0
[debug] : 0, transpose_49.tmp_0
[debug] : 0, transpose_49.tmp_1
[debug] : 0, reshape2_50.tmp_0
[debug] : 0, transpose_50.tmp_0
[debug] : 0, transpose_50.tmp_1
[debug] : 0, scale_13.tmp_0
[debug] : 0, matmul_25.tmp_0
[debug] : 0, tmp_64
[debug] : 0, softmax_12.tmp_0
[debug] : 0, dropout_37.tmp_0
[debug] : 0, dropout_37.tmp_1
[debug] : 0, matmul_26.tmp_0
[debug] : 0, transpose_51.tmp_0
[debug] : 0, transpose_51.tmp_1
[debug] : 0, reshape2_51.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_output_fc.w_0
[debug] : 0, fc_75.tmp_0
[debug] : 0, encoder_layer_12_multi_head_att_output_fc.b_0
[debug] : 0, fc_75.tmp_1
[debug] : 0, dropout_38.tmp_0
[debug] : 0, dropout_38.tmp_1
[debug] : 0, tmp_65
[debug] : 0, reduce_mean_50.tmp_0
[debug] : 0, elementwise_sub_25
[debug] : 0, square_25.tmp_0
[debug] : 0, reduce_mean_51.tmp_0
[debug] : 0, tmp_66
[debug] : 0, rsqrt_25.tmp_0
[debug] : 0, elementwise_mul_50
[debug] : 0, encoder_layer_12_post_att_layer_norm_scale
[debug] : 0, encoder_layer_12_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_51
[debug] : 0, elementwise_add_25
[debug] : 0, encoder_layer_12_ffn_fc_0.w_0
[debug] : 0, fc_76.tmp_0
[debug] : 0, encoder_layer_12_ffn_fc_0.b_0
[debug] : 0, fc_76.tmp_1
[debug] : 0, fc_76.tmp_2
[debug] : 0, encoder_layer_12_ffn_fc_1.w_0
[debug] : 0, fc_77.tmp_0
[debug] : 0, encoder_layer_12_ffn_fc_1.b_0
[debug] : 0, fc_77.tmp_1
[debug] : 0, dropout_39.tmp_0
[debug] : 0, dropout_39.tmp_1
[debug] : 0, tmp_67
[debug] : 0, reduce_mean_52.tmp_0
[debug] : 0, elementwise_sub_26
[debug] : 0, square_26.tmp_0
[debug] : 0, reduce_mean_53.tmp_0
[debug] : 0, tmp_68
[debug] : 0, rsqrt_26.tmp_0
[debug] : 0, elementwise_mul_52
[debug] : 0, encoder_layer_12_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_12_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_53
[debug] : 0, elementwise_add_26
[debug] : 0, encoder_layer_13_multi_head_att_query_fc.w_0
[debug] : 0, fc_78.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_query_fc.b_0
[debug] : 0, fc_78.tmp_1
[debug] : 0, encoder_layer_13_multi_head_att_key_fc.w_0
[debug] : 0, fc_79.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_key_fc.b_0
[debug] : 0, fc_79.tmp_1
[debug] : 0, encoder_layer_13_multi_head_att_value_fc.w_0
[debug] : 0, fc_80.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_value_fc.b_0
[debug] : 0, fc_80.tmp_1
[debug] : 0, reshape2_52.tmp_0
[debug] : 0, transpose_52.tmp_0
[debug] : 0, transpose_52.tmp_1
[debug] : 0, reshape2_53.tmp_0
[debug] : 0, transpose_53.tmp_0
[debug] : 0, transpose_53.tmp_1
[debug] : 0, reshape2_54.tmp_0
[debug] : 0, transpose_54.tmp_0
[debug] : 0, transpose_54.tmp_1
[debug] : 0, scale_14.tmp_0
[debug] : 0, matmul_27.tmp_0
[debug] : 0, tmp_69
[debug] : 0, softmax_13.tmp_0
[debug] : 0, dropout_40.tmp_0
[debug] : 0, dropout_40.tmp_1
[debug] : 0, matmul_28.tmp_0
[debug] : 0, transpose_55.tmp_0
[debug] : 0, transpose_55.tmp_1
[debug] : 0, reshape2_55.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_output_fc.w_0
[debug] : 0, fc_81.tmp_0
[debug] : 0, encoder_layer_13_multi_head_att_output_fc.b_0
[debug] : 0, fc_81.tmp_1
[debug] : 0, dropout_41.tmp_0
[debug] : 0, dropout_41.tmp_1
[debug] : 0, tmp_70
[debug] : 0, reduce_mean_54.tmp_0
[debug] : 0, elementwise_sub_27
[debug] : 0, square_27.tmp_0
[debug] : 0, reduce_mean_55.tmp_0
[debug] : 0, tmp_71
[debug] : 0, rsqrt_27.tmp_0
[debug] : 0, elementwise_mul_54
[debug] : 0, encoder_layer_13_post_att_layer_norm_scale
[debug] : 0, encoder_layer_13_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_55
[debug] : 0, elementwise_add_27
[debug] : 0, encoder_layer_13_ffn_fc_0.w_0
[debug] : 0, fc_82.tmp_0
[debug] : 0, encoder_layer_13_ffn_fc_0.b_0
[debug] : 0, fc_82.tmp_1
[debug] : 0, fc_82.tmp_2
[debug] : 0, encoder_layer_13_ffn_fc_1.w_0
[debug] : 0, fc_83.tmp_0
[debug] : 0, encoder_layer_13_ffn_fc_1.b_0
[debug] : 0, fc_83.tmp_1
[debug] : 0, dropout_42.tmp_0
[debug] : 0, dropout_42.tmp_1
[debug] : 0, tmp_72
[debug] : 0, reduce_mean_56.tmp_0
[debug] : 0, elementwise_sub_28
[debug] : 0, square_28.tmp_0
[debug] : 0, reduce_mean_57.tmp_0
[debug] : 0, tmp_73
[debug] : 0, rsqrt_28.tmp_0
[debug] : 0, elementwise_mul_56
[debug] : 0, encoder_layer_13_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_13_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_57
[debug] : 0, elementwise_add_28
[debug] : 0, encoder_layer_14_multi_head_att_query_fc.w_0
[debug] : 0, fc_84.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_query_fc.b_0
[debug] : 0, fc_84.tmp_1
[debug] : 0, encoder_layer_14_multi_head_att_key_fc.w_0
[debug] : 0, fc_85.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_key_fc.b_0
[debug] : 0, fc_85.tmp_1
[debug] : 0, encoder_layer_14_multi_head_att_value_fc.w_0
[debug] : 0, fc_86.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_value_fc.b_0
[debug] : 0, fc_86.tmp_1
[debug] : 0, reshape2_56.tmp_0
[debug] : 0, transpose_56.tmp_0
[debug] : 0, transpose_56.tmp_1
[debug] : 0, reshape2_57.tmp_0
[debug] : 0, transpose_57.tmp_0
[debug] : 0, transpose_57.tmp_1
[debug] : 0, reshape2_58.tmp_0
[debug] : 0, transpose_58.tmp_0
[debug] : 0, transpose_58.tmp_1
[debug] : 0, scale_15.tmp_0
[debug] : 0, matmul_29.tmp_0
[debug] : 0, tmp_74
[debug] : 0, softmax_14.tmp_0
[debug] : 0, dropout_43.tmp_0
[debug] : 0, dropout_43.tmp_1
[debug] : 0, matmul_30.tmp_0
[debug] : 0, transpose_59.tmp_0
[debug] : 0, transpose_59.tmp_1
[debug] : 0, reshape2_59.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_output_fc.w_0
[debug] : 0, fc_87.tmp_0
[debug] : 0, encoder_layer_14_multi_head_att_output_fc.b_0
[debug] : 0, fc_87.tmp_1
[debug] : 0, dropout_44.tmp_0
[debug] : 0, dropout_44.tmp_1
[debug] : 0, tmp_75
[debug] : 0, reduce_mean_58.tmp_0
[debug] : 0, elementwise_sub_29
[debug] : 0, square_29.tmp_0
[debug] : 0, reduce_mean_59.tmp_0
[debug] : 0, tmp_76
[debug] : 0, rsqrt_29.tmp_0
[debug] : 0, elementwise_mul_58
[debug] : 0, encoder_layer_14_post_att_layer_norm_scale
[debug] : 0, encoder_layer_14_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_59
[debug] : 0, elementwise_add_29
[debug] : 0, encoder_layer_14_ffn_fc_0.w_0
[debug] : 0, fc_88.tmp_0
[debug] : 0, encoder_layer_14_ffn_fc_0.b_0
[debug] : 0, fc_88.tmp_1
[debug] : 0, fc_88.tmp_2
[debug] : 0, encoder_layer_14_ffn_fc_1.w_0
[debug] : 0, fc_89.tmp_0
[debug] : 0, encoder_layer_14_ffn_fc_1.b_0
[debug] : 0, fc_89.tmp_1
[debug] : 0, dropout_45.tmp_0
[debug] : 0, dropout_45.tmp_1
[debug] : 0, tmp_77
[debug] : 0, reduce_mean_60.tmp_0
[debug] : 0, elementwise_sub_30
[debug] : 0, square_30.tmp_0
[debug] : 0, reduce_mean_61.tmp_0
[debug] : 0, tmp_78
[debug] : 0, rsqrt_30.tmp_0
[debug] : 0, elementwise_mul_60
[debug] : 0, encoder_layer_14_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_14_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_61
[debug] : 0, elementwise_add_30
[debug] : 0, encoder_layer_15_multi_head_att_query_fc.w_0
[debug] : 0, fc_90.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_query_fc.b_0
[debug] : 0, fc_90.tmp_1
[debug] : 0, encoder_layer_15_multi_head_att_key_fc.w_0
[debug] : 0, fc_91.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_key_fc.b_0
[debug] : 0, fc_91.tmp_1
[debug] : 0, encoder_layer_15_multi_head_att_value_fc.w_0
[debug] : 0, fc_92.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_value_fc.b_0
[debug] : 0, fc_92.tmp_1
[debug] : 0, reshape2_60.tmp_0
[debug] : 0, transpose_60.tmp_0
[debug] : 0, transpose_60.tmp_1
[debug] : 0, reshape2_61.tmp_0
[debug] : 0, transpose_61.tmp_0
[debug] : 0, transpose_61.tmp_1
[debug] : 0, reshape2_62.tmp_0
[debug] : 0, transpose_62.tmp_0
[debug] : 0, transpose_62.tmp_1
[debug] : 0, scale_16.tmp_0
[debug] : 0, matmul_31.tmp_0
[debug] : 0, tmp_79
[debug] : 0, softmax_15.tmp_0
[debug] : 0, dropout_46.tmp_0
[debug] : 0, dropout_46.tmp_1
[debug] : 0, matmul_32.tmp_0
[debug] : 0, transpose_63.tmp_0
[debug] : 0, transpose_63.tmp_1
[debug] : 0, reshape2_63.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_output_fc.w_0
[debug] : 0, fc_93.tmp_0
[debug] : 0, encoder_layer_15_multi_head_att_output_fc.b_0
[debug] : 0, fc_93.tmp_1
[debug] : 0, dropout_47.tmp_0
[debug] : 0, dropout_47.tmp_1
[debug] : 0, tmp_80
[debug] : 0, reduce_mean_62.tmp_0
[debug] : 0, elementwise_sub_31
[debug] : 0, square_31.tmp_0
[debug] : 0, reduce_mean_63.tmp_0
[debug] : 0, tmp_81
[debug] : 0, rsqrt_31.tmp_0
[debug] : 0, elementwise_mul_62
[debug] : 0, encoder_layer_15_post_att_layer_norm_scale
[debug] : 0, encoder_layer_15_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_63
[debug] : 0, elementwise_add_31
[debug] : 0, encoder_layer_15_ffn_fc_0.w_0
[debug] : 0, fc_94.tmp_0
[debug] : 0, encoder_layer_15_ffn_fc_0.b_0
[debug] : 0, fc_94.tmp_1
[debug] : 0, fc_94.tmp_2
[debug] : 0, encoder_layer_15_ffn_fc_1.w_0
[debug] : 0, fc_95.tmp_0
[debug] : 0, encoder_layer_15_ffn_fc_1.b_0
[debug] : 0, fc_95.tmp_1
[debug] : 0, dropout_48.tmp_0
[debug] : 0, dropout_48.tmp_1
[debug] : 0, tmp_82
[debug] : 0, reduce_mean_64.tmp_0
[debug] : 0, elementwise_sub_32
[debug] : 0, square_32.tmp_0
[debug] : 0, reduce_mean_65.tmp_0
[debug] : 0, tmp_83
[debug] : 0, rsqrt_32.tmp_0
[debug] : 0, elementwise_mul_64
[debug] : 0, encoder_layer_15_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_15_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_65
[debug] : 0, elementwise_add_32
[debug] : 0, encoder_layer_16_multi_head_att_query_fc.w_0
[debug] : 0, fc_96.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_query_fc.b_0
[debug] : 0, fc_96.tmp_1
[debug] : 0, encoder_layer_16_multi_head_att_key_fc.w_0
[debug] : 0, fc_97.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_key_fc.b_0
[debug] : 0, fc_97.tmp_1
[debug] : 0, encoder_layer_16_multi_head_att_value_fc.w_0
[debug] : 0, fc_98.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_value_fc.b_0
[debug] : 0, fc_98.tmp_1
[debug] : 0, reshape2_64.tmp_0
[debug] : 0, transpose_64.tmp_0
[debug] : 0, transpose_64.tmp_1
[debug] : 0, reshape2_65.tmp_0
[debug] : 0, transpose_65.tmp_0
[debug] : 0, transpose_65.tmp_1
[debug] : 0, reshape2_66.tmp_0
[debug] : 0, transpose_66.tmp_0
[debug] : 0, transpose_66.tmp_1
[debug] : 0, scale_17.tmp_0
[debug] : 0, matmul_33.tmp_0
[debug] : 0, tmp_84
[debug] : 0, softmax_16.tmp_0
[debug] : 0, dropout_49.tmp_0
[debug] : 0, dropout_49.tmp_1
[debug] : 0, matmul_34.tmp_0
[debug] : 0, transpose_67.tmp_0
[debug] : 0, transpose_67.tmp_1
[debug] : 0, reshape2_67.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_output_fc.w_0
[debug] : 0, fc_99.tmp_0
[debug] : 0, encoder_layer_16_multi_head_att_output_fc.b_0
[debug] : 0, fc_99.tmp_1
[debug] : 0, dropout_50.tmp_0
[debug] : 0, dropout_50.tmp_1
[debug] : 0, tmp_85
[debug] : 0, reduce_mean_66.tmp_0
[debug] : 0, elementwise_sub_33
[debug] : 0, square_33.tmp_0
[debug] : 0, reduce_mean_67.tmp_0
[debug] : 0, tmp_86
[debug] : 0, rsqrt_33.tmp_0
[debug] : 0, elementwise_mul_66
[debug] : 0, encoder_layer_16_post_att_layer_norm_scale
[debug] : 0, encoder_layer_16_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_67
[debug] : 0, elementwise_add_33
[debug] : 0, encoder_layer_16_ffn_fc_0.w_0
[debug] : 0, fc_100.tmp_0
[debug] : 0, encoder_layer_16_ffn_fc_0.b_0
[debug] : 0, fc_100.tmp_1
[debug] : 0, fc_100.tmp_2
[debug] : 0, encoder_layer_16_ffn_fc_1.w_0
[debug] : 0, fc_101.tmp_0
[debug] : 0, encoder_layer_16_ffn_fc_1.b_0
[debug] : 0, fc_101.tmp_1
[debug] : 0, dropout_51.tmp_0
[debug] : 0, dropout_51.tmp_1
[debug] : 0, tmp_87
[debug] : 0, reduce_mean_68.tmp_0
[debug] : 0, elementwise_sub_34
[debug] : 0, square_34.tmp_0
[debug] : 0, reduce_mean_69.tmp_0
[debug] : 0, tmp_88
[debug] : 0, rsqrt_34.tmp_0
[debug] : 0, elementwise_mul_68
[debug] : 0, encoder_layer_16_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_16_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_69
[debug] : 0, elementwise_add_34
[debug] : 0, encoder_layer_17_multi_head_att_query_fc.w_0
[debug] : 0, fc_102.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_query_fc.b_0
[debug] : 0, fc_102.tmp_1
[debug] : 0, encoder_layer_17_multi_head_att_key_fc.w_0
[debug] : 0, fc_103.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_key_fc.b_0
[debug] : 0, fc_103.tmp_1
[debug] : 0, encoder_layer_17_multi_head_att_value_fc.w_0
[debug] : 0, fc_104.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_value_fc.b_0
[debug] : 0, fc_104.tmp_1
[debug] : 0, reshape2_68.tmp_0
[debug] : 0, transpose_68.tmp_0
[debug] : 0, transpose_68.tmp_1
[debug] : 0, reshape2_69.tmp_0
[debug] : 0, transpose_69.tmp_0
[debug] : 0, transpose_69.tmp_1
[debug] : 0, reshape2_70.tmp_0
[debug] : 0, transpose_70.tmp_0
[debug] : 0, transpose_70.tmp_1
[debug] : 0, scale_18.tmp_0
[debug] : 0, matmul_35.tmp_0
[debug] : 0, tmp_89
[debug] : 0, softmax_17.tmp_0
[debug] : 0, dropout_52.tmp_0
[debug] : 0, dropout_52.tmp_1
[debug] : 0, matmul_36.tmp_0
[debug] : 0, transpose_71.tmp_0
[debug] : 0, transpose_71.tmp_1
[debug] : 0, reshape2_71.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_output_fc.w_0
[debug] : 0, fc_105.tmp_0
[debug] : 0, encoder_layer_17_multi_head_att_output_fc.b_0
[debug] : 0, fc_105.tmp_1
[debug] : 0, dropout_53.tmp_0
[debug] : 0, dropout_53.tmp_1
[debug] : 0, tmp_90
[debug] : 0, reduce_mean_70.tmp_0
[debug] : 0, elementwise_sub_35
[debug] : 0, square_35.tmp_0
[debug] : 0, reduce_mean_71.tmp_0
[debug] : 0, tmp_91
[debug] : 0, rsqrt_35.tmp_0
[debug] : 0, elementwise_mul_70
[debug] : 0, encoder_layer_17_post_att_layer_norm_scale
[debug] : 0, encoder_layer_17_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_71
[debug] : 0, elementwise_add_35
[debug] : 0, encoder_layer_17_ffn_fc_0.w_0
[debug] : 0, fc_106.tmp_0
[debug] : 0, encoder_layer_17_ffn_fc_0.b_0
[debug] : 0, fc_106.tmp_1
[debug] : 0, fc_106.tmp_2
[debug] : 0, encoder_layer_17_ffn_fc_1.w_0
[debug] : 0, fc_107.tmp_0
[debug] : 0, encoder_layer_17_ffn_fc_1.b_0
[debug] : 0, fc_107.tmp_1
[debug] : 0, dropout_54.tmp_0
[debug] : 0, dropout_54.tmp_1
[debug] : 0, tmp_92
[debug] : 0, reduce_mean_72.tmp_0
[debug] : 0, elementwise_sub_36
[debug] : 0, square_36.tmp_0
[debug] : 0, reduce_mean_73.tmp_0
[debug] : 0, tmp_93
[debug] : 0, rsqrt_36.tmp_0
[debug] : 0, elementwise_mul_72
[debug] : 0, encoder_layer_17_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_17_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_73
[debug] : 0, elementwise_add_36
[debug] : 0, encoder_layer_18_multi_head_att_query_fc.w_0
[debug] : 0, fc_108.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_query_fc.b_0
[debug] : 0, fc_108.tmp_1
[debug] : 0, encoder_layer_18_multi_head_att_key_fc.w_0
[debug] : 0, fc_109.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_key_fc.b_0
[debug] : 0, fc_109.tmp_1
[debug] : 0, encoder_layer_18_multi_head_att_value_fc.w_0
[debug] : 0, fc_110.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_value_fc.b_0
[debug] : 0, fc_110.tmp_1
[debug] : 0, reshape2_72.tmp_0
[debug] : 0, transpose_72.tmp_0
[debug] : 0, transpose_72.tmp_1
[debug] : 0, reshape2_73.tmp_0
[debug] : 0, transpose_73.tmp_0
[debug] : 0, transpose_73.tmp_1
[debug] : 0, reshape2_74.tmp_0
[debug] : 0, transpose_74.tmp_0
[debug] : 0, transpose_74.tmp_1
[debug] : 0, scale_19.tmp_0
[debug] : 0, matmul_37.tmp_0
[debug] : 0, tmp_94
[debug] : 0, softmax_18.tmp_0
[debug] : 0, dropout_55.tmp_0
[debug] : 0, dropout_55.tmp_1
[debug] : 0, matmul_38.tmp_0
[debug] : 0, transpose_75.tmp_0
[debug] : 0, transpose_75.tmp_1
[debug] : 0, reshape2_75.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_output_fc.w_0
[debug] : 0, fc_111.tmp_0
[debug] : 0, encoder_layer_18_multi_head_att_output_fc.b_0
[debug] : 0, fc_111.tmp_1
[debug] : 0, dropout_56.tmp_0
[debug] : 0, dropout_56.tmp_1
[debug] : 0, tmp_95
[debug] : 0, reduce_mean_74.tmp_0
[debug] : 0, elementwise_sub_37
[debug] : 0, square_37.tmp_0
[debug] : 0, reduce_mean_75.tmp_0
[debug] : 0, tmp_96
[debug] : 0, rsqrt_37.tmp_0
[debug] : 0, elementwise_mul_74
[debug] : 0, encoder_layer_18_post_att_layer_norm_scale
[debug] : 0, encoder_layer_18_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_75
[debug] : 0, elementwise_add_37
[debug] : 0, encoder_layer_18_ffn_fc_0.w_0
[debug] : 0, fc_112.tmp_0
[debug] : 0, encoder_layer_18_ffn_fc_0.b_0
[debug] : 0, fc_112.tmp_1
[debug] : 0, fc_112.tmp_2
[debug] : 0, encoder_layer_18_ffn_fc_1.w_0
[debug] : 0, fc_113.tmp_0
[debug] : 0, encoder_layer_18_ffn_fc_1.b_0
[debug] : 0, fc_113.tmp_1
[debug] : 0, dropout_57.tmp_0
[debug] : 0, dropout_57.tmp_1
[debug] : 0, tmp_97
[debug] : 0, reduce_mean_76.tmp_0
[debug] : 0, elementwise_sub_38
[debug] : 0, square_38.tmp_0
[debug] : 0, reduce_mean_77.tmp_0
[debug] : 0, tmp_98
[debug] : 0, rsqrt_38.tmp_0
[debug] : 0, elementwise_mul_76
[debug] : 0, encoder_layer_18_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_18_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_77
[debug] : 0, elementwise_add_38
[debug] : 0, encoder_layer_19_multi_head_att_query_fc.w_0
[debug] : 0, fc_114.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_query_fc.b_0
[debug] : 0, fc_114.tmp_1
[debug] : 0, encoder_layer_19_multi_head_att_key_fc.w_0
[debug] : 0, fc_115.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_key_fc.b_0
[debug] : 0, fc_115.tmp_1
[debug] : 0, encoder_layer_19_multi_head_att_value_fc.w_0
[debug] : 0, fc_116.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_value_fc.b_0
[debug] : 0, fc_116.tmp_1
[debug] : 0, reshape2_76.tmp_0
[debug] : 0, transpose_76.tmp_0
[debug] : 0, transpose_76.tmp_1
[debug] : 0, reshape2_77.tmp_0
[debug] : 0, transpose_77.tmp_0
[debug] : 0, transpose_77.tmp_1
[debug] : 0, reshape2_78.tmp_0
[debug] : 0, transpose_78.tmp_0
[debug] : 0, transpose_78.tmp_1
[debug] : 0, scale_20.tmp_0
[debug] : 0, matmul_39.tmp_0
[debug] : 0, tmp_99
[debug] : 0, softmax_19.tmp_0
[debug] : 0, dropout_58.tmp_0
[debug] : 0, dropout_58.tmp_1
[debug] : 0, matmul_40.tmp_0
[debug] : 0, transpose_79.tmp_0
[debug] : 0, transpose_79.tmp_1
[debug] : 0, reshape2_79.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_output_fc.w_0
[debug] : 0, fc_117.tmp_0
[debug] : 0, encoder_layer_19_multi_head_att_output_fc.b_0
[debug] : 0, fc_117.tmp_1
[debug] : 0, dropout_59.tmp_0
[debug] : 0, dropout_59.tmp_1
[debug] : 0, tmp_100
[debug] : 0, reduce_mean_78.tmp_0
[debug] : 0, elementwise_sub_39
[debug] : 0, square_39.tmp_0
[debug] : 0, reduce_mean_79.tmp_0
[debug] : 0, tmp_101
[debug] : 0, rsqrt_39.tmp_0
[debug] : 0, elementwise_mul_78
[debug] : 0, encoder_layer_19_post_att_layer_norm_scale
[debug] : 0, encoder_layer_19_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_79
[debug] : 0, elementwise_add_39
[debug] : 0, encoder_layer_19_ffn_fc_0.w_0
[debug] : 0, fc_118.tmp_0
[debug] : 0, encoder_layer_19_ffn_fc_0.b_0
[debug] : 0, fc_118.tmp_1
[debug] : 0, fc_118.tmp_2
[debug] : 0, encoder_layer_19_ffn_fc_1.w_0
[debug] : 0, fc_119.tmp_0
[debug] : 0, encoder_layer_19_ffn_fc_1.b_0
[debug] : 0, fc_119.tmp_1
[debug] : 0, dropout_60.tmp_0
[debug] : 0, dropout_60.tmp_1
[debug] : 0, tmp_102
[debug] : 0, reduce_mean_80.tmp_0
[debug] : 0, elementwise_sub_40
[debug] : 0, square_40.tmp_0
[debug] : 0, reduce_mean_81.tmp_0
[debug] : 0, tmp_103
[debug] : 0, rsqrt_40.tmp_0
[debug] : 0, elementwise_mul_80
[debug] : 0, encoder_layer_19_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_19_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_81
[debug] : 0, elementwise_add_40
[debug] : 0, encoder_layer_20_multi_head_att_query_fc.w_0
[debug] : 0, fc_120.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_query_fc.b_0
[debug] : 0, fc_120.tmp_1
[debug] : 0, encoder_layer_20_multi_head_att_key_fc.w_0
[debug] : 0, fc_121.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_key_fc.b_0
[debug] : 0, fc_121.tmp_1
[debug] : 0, encoder_layer_20_multi_head_att_value_fc.w_0
[debug] : 0, fc_122.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_value_fc.b_0
[debug] : 0, fc_122.tmp_1
[debug] : 0, reshape2_80.tmp_0
[debug] : 0, transpose_80.tmp_0
[debug] : 0, transpose_80.tmp_1
[debug] : 0, reshape2_81.tmp_0
[debug] : 0, transpose_81.tmp_0
[debug] : 0, transpose_81.tmp_1
[debug] : 0, reshape2_82.tmp_0
[debug] : 0, transpose_82.tmp_0
[debug] : 0, transpose_82.tmp_1
[debug] : 0, scale_21.tmp_0
[debug] : 0, matmul_41.tmp_0
[debug] : 0, tmp_104
[debug] : 0, softmax_20.tmp_0
[debug] : 0, dropout_61.tmp_0
[debug] : 0, dropout_61.tmp_1
[debug] : 0, matmul_42.tmp_0
[debug] : 0, transpose_83.tmp_0
[debug] : 0, transpose_83.tmp_1
[debug] : 0, reshape2_83.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_output_fc.w_0
[debug] : 0, fc_123.tmp_0
[debug] : 0, encoder_layer_20_multi_head_att_output_fc.b_0
[debug] : 0, fc_123.tmp_1
[debug] : 0, dropout_62.tmp_0
[debug] : 0, dropout_62.tmp_1
[debug] : 0, tmp_105
[debug] : 0, reduce_mean_82.tmp_0
[debug] : 0, elementwise_sub_41
[debug] : 0, square_41.tmp_0
[debug] : 0, reduce_mean_83.tmp_0
[debug] : 0, tmp_106
[debug] : 0, rsqrt_41.tmp_0
[debug] : 0, elementwise_mul_82
[debug] : 0, encoder_layer_20_post_att_layer_norm_scale
[debug] : 0, encoder_layer_20_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_83
[debug] : 0, elementwise_add_41
[debug] : 0, encoder_layer_20_ffn_fc_0.w_0
[debug] : 0, fc_124.tmp_0
[debug] : 0, encoder_layer_20_ffn_fc_0.b_0
[debug] : 0, fc_124.tmp_1
[debug] : 0, fc_124.tmp_2
[debug] : 0, encoder_layer_20_ffn_fc_1.w_0
[debug] : 0, fc_125.tmp_0
[debug] : 0, encoder_layer_20_ffn_fc_1.b_0
[debug] : 0, fc_125.tmp_1
[debug] : 0, dropout_63.tmp_0
[debug] : 0, dropout_63.tmp_1
[debug] : 0, tmp_107
[debug] : 0, reduce_mean_84.tmp_0
[debug] : 0, elementwise_sub_42
[debug] : 0, square_42.tmp_0
[debug] : 0, reduce_mean_85.tmp_0
[debug] : 0, tmp_108
[debug] : 0, rsqrt_42.tmp_0
[debug] : 0, elementwise_mul_84
[debug] : 0, encoder_layer_20_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_20_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_85
[debug] : 0, elementwise_add_42
[debug] : 0, encoder_layer_21_multi_head_att_query_fc.w_0
[debug] : 0, fc_126.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_query_fc.b_0
[debug] : 0, fc_126.tmp_1
[debug] : 0, encoder_layer_21_multi_head_att_key_fc.w_0
[debug] : 0, fc_127.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_key_fc.b_0
[debug] : 0, fc_127.tmp_1
[debug] : 0, encoder_layer_21_multi_head_att_value_fc.w_0
[debug] : 0, fc_128.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_value_fc.b_0
[debug] : 0, fc_128.tmp_1
[debug] : 0, reshape2_84.tmp_0
[debug] : 0, transpose_84.tmp_0
[debug] : 0, transpose_84.tmp_1
[debug] : 0, reshape2_85.tmp_0
[debug] : 0, transpose_85.tmp_0
[debug] : 0, transpose_85.tmp_1
[debug] : 0, reshape2_86.tmp_0
[debug] : 0, transpose_86.tmp_0
[debug] : 0, transpose_86.tmp_1
[debug] : 0, scale_22.tmp_0
[debug] : 0, matmul_43.tmp_0
[debug] : 0, tmp_109
[debug] : 0, softmax_21.tmp_0
[debug] : 0, dropout_64.tmp_0
[debug] : 0, dropout_64.tmp_1
[debug] : 0, matmul_44.tmp_0
[debug] : 0, transpose_87.tmp_0
[debug] : 0, transpose_87.tmp_1
[debug] : 0, reshape2_87.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_output_fc.w_0
[debug] : 0, fc_129.tmp_0
[debug] : 0, encoder_layer_21_multi_head_att_output_fc.b_0
[debug] : 0, fc_129.tmp_1
[debug] : 0, dropout_65.tmp_0
[debug] : 0, dropout_65.tmp_1
[debug] : 0, tmp_110
[debug] : 0, reduce_mean_86.tmp_0
[debug] : 0, elementwise_sub_43
[debug] : 0, square_43.tmp_0
[debug] : 0, reduce_mean_87.tmp_0
[debug] : 0, tmp_111
[debug] : 0, rsqrt_43.tmp_0
[debug] : 0, elementwise_mul_86
[debug] : 0, encoder_layer_21_post_att_layer_norm_scale
[debug] : 0, encoder_layer_21_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_87
[debug] : 0, elementwise_add_43
[debug] : 0, encoder_layer_21_ffn_fc_0.w_0
[debug] : 0, fc_130.tmp_0
[debug] : 0, encoder_layer_21_ffn_fc_0.b_0
[debug] : 0, fc_130.tmp_1
[debug] : 0, fc_130.tmp_2
[debug] : 0, encoder_layer_21_ffn_fc_1.w_0
[debug] : 0, fc_131.tmp_0
[debug] : 0, encoder_layer_21_ffn_fc_1.b_0
[debug] : 0, fc_131.tmp_1
[debug] : 0, dropout_66.tmp_0
[debug] : 0, dropout_66.tmp_1
[debug] : 0, tmp_112
[debug] : 0, reduce_mean_88.tmp_0
[debug] : 0, elementwise_sub_44
[debug] : 0, square_44.tmp_0
[debug] : 0, reduce_mean_89.tmp_0
[debug] : 0, tmp_113
[debug] : 0, rsqrt_44.tmp_0
[debug] : 0, elementwise_mul_88
[debug] : 0, encoder_layer_21_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_21_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_89
[debug] : 0, elementwise_add_44
[debug] : 0, encoder_layer_22_multi_head_att_query_fc.w_0
[debug] : 0, fc_132.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_query_fc.b_0
[debug] : 0, fc_132.tmp_1
[debug] : 0, encoder_layer_22_multi_head_att_key_fc.w_0
[debug] : 0, fc_133.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_key_fc.b_0
[debug] : 0, fc_133.tmp_1
[debug] : 0, encoder_layer_22_multi_head_att_value_fc.w_0
[debug] : 0, fc_134.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_value_fc.b_0
[debug] : 0, fc_134.tmp_1
[debug] : 0, reshape2_88.tmp_0
[debug] : 0, transpose_88.tmp_0
[debug] : 0, transpose_88.tmp_1
[debug] : 0, reshape2_89.tmp_0
[debug] : 0, transpose_89.tmp_0
[debug] : 0, transpose_89.tmp_1
[debug] : 0, reshape2_90.tmp_0
[debug] : 0, transpose_90.tmp_0
[debug] : 0, transpose_90.tmp_1
[debug] : 0, scale_23.tmp_0
[debug] : 0, matmul_45.tmp_0
[debug] : 0, tmp_114
[debug] : 0, softmax_22.tmp_0
[debug] : 0, dropout_67.tmp_0
[debug] : 0, dropout_67.tmp_1
[debug] : 0, matmul_46.tmp_0
[debug] : 0, transpose_91.tmp_0
[debug] : 0, transpose_91.tmp_1
[debug] : 0, reshape2_91.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_output_fc.w_0
[debug] : 0, fc_135.tmp_0
[debug] : 0, encoder_layer_22_multi_head_att_output_fc.b_0
[debug] : 0, fc_135.tmp_1
[debug] : 0, dropout_68.tmp_0
[debug] : 0, dropout_68.tmp_1
[debug] : 0, tmp_115
[debug] : 0, reduce_mean_90.tmp_0
[debug] : 0, elementwise_sub_45
[debug] : 0, square_45.tmp_0
[debug] : 0, reduce_mean_91.tmp_0
[debug] : 0, tmp_116
[debug] : 0, rsqrt_45.tmp_0
[debug] : 0, elementwise_mul_90
[debug] : 0, encoder_layer_22_post_att_layer_norm_scale
[debug] : 0, encoder_layer_22_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_91
[debug] : 0, elementwise_add_45
[debug] : 0, encoder_layer_22_ffn_fc_0.w_0
[debug] : 0, fc_136.tmp_0
[debug] : 0, encoder_layer_22_ffn_fc_0.b_0
[debug] : 0, fc_136.tmp_1
[debug] : 0, fc_136.tmp_2
[debug] : 0, encoder_layer_22_ffn_fc_1.w_0
[debug] : 0, fc_137.tmp_0
[debug] : 0, encoder_layer_22_ffn_fc_1.b_0
[debug] : 0, fc_137.tmp_1
[debug] : 0, dropout_69.tmp_0
[debug] : 0, dropout_69.tmp_1
[debug] : 0, tmp_117
[debug] : 0, reduce_mean_92.tmp_0
[debug] : 0, elementwise_sub_46
[debug] : 0, square_46.tmp_0
[debug] : 0, reduce_mean_93.tmp_0
[debug] : 0, tmp_118
[debug] : 0, rsqrt_46.tmp_0
[debug] : 0, elementwise_mul_92
[debug] : 0, encoder_layer_22_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_22_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_93
[debug] : 0, elementwise_add_46
[debug] : 0, encoder_layer_23_multi_head_att_query_fc.w_0
[debug] : 0, fc_138.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_query_fc.b_0
[debug] : 0, fc_138.tmp_1
[debug] : 0, encoder_layer_23_multi_head_att_key_fc.w_0
[debug] : 0, fc_139.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_key_fc.b_0
[debug] : 0, fc_139.tmp_1
[debug] : 0, encoder_layer_23_multi_head_att_value_fc.w_0
[debug] : 0, fc_140.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_value_fc.b_0
[debug] : 0, fc_140.tmp_1
[debug] : 0, reshape2_92.tmp_0
[debug] : 0, transpose_92.tmp_0
[debug] : 0, transpose_92.tmp_1
[debug] : 0, reshape2_93.tmp_0
[debug] : 0, transpose_93.tmp_0
[debug] : 0, transpose_93.tmp_1
[debug] : 0, reshape2_94.tmp_0
[debug] : 0, transpose_94.tmp_0
[debug] : 0, transpose_94.tmp_1
[debug] : 0, scale_24.tmp_0
[debug] : 0, matmul_47.tmp_0
[debug] : 0, tmp_119
[debug] : 0, softmax_23.tmp_0
[debug] : 0, dropout_70.tmp_0
[debug] : 0, dropout_70.tmp_1
[debug] : 0, matmul_48.tmp_0
[debug] : 0, transpose_95.tmp_0
[debug] : 0, transpose_95.tmp_1
[debug] : 0, reshape2_95.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_output_fc.w_0
[debug] : 0, fc_141.tmp_0
[debug] : 0, encoder_layer_23_multi_head_att_output_fc.b_0
[debug] : 0, fc_141.tmp_1
[debug] : 0, dropout_71.tmp_0
[debug] : 0, dropout_71.tmp_1
[debug] : 0, tmp_120
[debug] : 0, reduce_mean_94.tmp_0
[debug] : 0, elementwise_sub_47
[debug] : 0, square_47.tmp_0
[debug] : 0, reduce_mean_95.tmp_0
[debug] : 0, tmp_121
[debug] : 0, rsqrt_47.tmp_0
[debug] : 0, elementwise_mul_94
[debug] : 0, encoder_layer_23_post_att_layer_norm_scale
[debug] : 0, encoder_layer_23_post_att_layer_norm_bias
[debug] : 0, elementwise_mul_95
[debug] : 0, elementwise_add_47
[debug] : 0, encoder_layer_23_ffn_fc_0.w_0
[debug] : 0, fc_142.tmp_0
[debug] : 0, encoder_layer_23_ffn_fc_0.b_0
[debug] : 0, fc_142.tmp_1
[debug] : 0, fc_142.tmp_2
[debug] : 0, encoder_layer_23_ffn_fc_1.w_0
[debug] : 0, fc_143.tmp_0
[debug] : 0, encoder_layer_23_ffn_fc_1.b_0
[debug] : 0, fc_143.tmp_1
[debug] : 0, dropout_72.tmp_0
[debug] : 0, dropout_72.tmp_1
[debug] : 0, tmp_122
[debug] : 0, reduce_mean_96.tmp_0
[debug] : 0, elementwise_sub_48
[debug] : 0, square_48.tmp_0
[debug] : 0, reduce_mean_97.tmp_0
[debug] : 0, tmp_123
[debug] : 0, rsqrt_48.tmp_0
[debug] : 0, elementwise_mul_96
[debug] : 0, encoder_layer_23_post_ffn_layer_norm_scale
[debug] : 0, encoder_layer_23_post_ffn_layer_norm_bias
[debug] : 0, elementwise_mul_97
[debug] : 0, elementwise_add_48
[debug] : 0, slice_0.tmp_0
[debug] : 0, reshape2_96.tmp_0
[debug] : 0, reshape2_96.tmp_1
[debug] : 0, pooled_fc.w_0
[debug] : 0, fc_144.tmp_0
[debug] : 0, pooled_fc.b_0
[debug] : 0, fc_144.tmp_1
[debug] : 0, fc_144.tmp_2
[debug] : 0, senti_cls.senti_cls.dropout_0.tmp_0
[debug] : 0, senti_cls.senti_cls.dropout_0.tmp_1
[debug] : 0, senti_cls.cls_out_w
[debug] : 0, senti_cls.senti_cls.fc_0.tmp_0
[debug] : 0, senti_cls.cls_out_b
[debug] : 0, senti_cls.senti_cls.fc_0.tmp_1
[debug] : 0, senti_cls.senti_cls.softmax_0.tmp_0
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_0
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_1
[debug] : 0, senti_cls.senti_cls.cross_entropy2_0.tmp_2
[debug] : 0, senti_cls.senti_cls.mean_0.tmp_0
[debug] : 0, reduce_sum_0.tmp_0
0
preparing data...ok!
61
30
name: "reduce_sum_0.tmp_0"
type {
type: LOD_TENSOR
lod_tensor {
tensor {
data_type: FP32
dims: 1
}
}
}
persistable: false
random init params...
Loading pretraining parameters from pretrain/ernie/params...
demo/demo3/run.py
浏览文件 @
df98c24f
...
@@ -6,7 +6,7 @@ if __name__ == '__main__':
...
@@ -6,7 +6,7 @@ if __name__ == '__main__':
max_seqlen
=
512
max_seqlen
=
512
batch_size
=
4
batch_size
=
4
num_epochs
=
2
num_epochs
=
2
0
lr
=
1e-3
lr
=
1e-3
vocab_path
=
'./pretrain/ernie/vocab.txt'
vocab_path
=
'./pretrain/ernie/vocab.txt'
...
@@ -67,7 +67,8 @@ if __name__ == '__main__':
...
@@ -67,7 +67,8 @@ if __name__ == '__main__':
cls_pred_head
=
palm
.
head
.
Classify
(
4
,
1024
,
phase
=
'pred'
)
cls_pred_head
=
palm
.
head
.
Classify
(
4
,
1024
,
phase
=
'pred'
)
trainer
.
build_predict_head
(
cls_pred_head
,
pred_ernie
)
trainer
.
build_predict_head
(
cls_pred_head
,
pred_ernie
)
trainer
.
train
(
iterator_fn
,
print_steps
=
1
,
save_steps
=
5
,
save_path
=
'outputs'
,
save_type
=
'ckpt,predict'
)
# trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
trainer
.
train
(
iterator_fn
,
print_steps
=
1
)
# trainer.save()
# trainer.save()
...
...
demo/demo3/run.sh
浏览文件 @
df98c24f
export
CUDA_VISIBLE_DEVICES
=
3
export
CUDA_VISIBLE_DEVICES
=
4
python run.py
python run.py
paddlepalm/.trainer.py.swp
0 → 100644
浏览文件 @
df98c24f
文件已添加
paddlepalm/backbone/ernie.py
浏览文件 @
df98c24f
...
@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
...
@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
input_mask
=
inputs
[
'input_mask'
]
input_mask
=
inputs
[
'input_mask'
]
task_ids
=
inputs
[
'task_ids'
]
task_ids
=
inputs
[
'task_ids'
]
fluid
.
layers
.
Print
(
src_ids
)
# padding id in vocabulary must be set to 0
# padding id in vocabulary must be set to 0
emb_out
=
fluid
.
embedding
(
emb_out
=
fluid
.
embedding
(
input
=
src_ids
,
input
=
src_ids
,
...
...
paddlepalm/distribute/__init__.py
浏览文件 @
df98c24f
...
@@ -5,5 +5,5 @@ import multiprocessing
...
@@ -5,5 +5,5 @@ import multiprocessing
gpu_dev_count
=
int
(
fluid
.
core
.
get_cuda_device_count
())
gpu_dev_count
=
int
(
fluid
.
core
.
get_cuda_device_count
())
cpu_dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
cpu_dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
from
reader
import
yield_pieces
,
data_feeder
from
reader
import
yield_pieces
,
data_feeder
,
decode_fake
paddlepalm/distribute/reader.py
浏览文件 @
df98c24f
...
@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
...
@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
distribute_strategy: support s=split, c=copy, u=unstack,
distribute_strategy: support s=split, c=copy, u=unstack,
"""
"""
assert
batch_size
%
dev_count
==
0
,
"batch_size need to be integer times larger than dev_count."
assert
batch_size
%
dev_count
==
0
,
"batch_size need to be integer times larger than dev_count."
print
(
'data in yield pieces'
)
#
print('data in yield pieces')
print
(
len
(
data
))
#
print(len(data))
assert
type
(
data
)
==
type
(
distribute_strategy
),
[
type
(
data
),
type
(
distribute_strategy
)]
assert
type
(
data
)
==
type
(
distribute_strategy
),
[
type
(
data
),
type
(
distribute_strategy
)]
assert
len
(
data
)
==
len
(
distribute_strategy
),
[
len
(
data
),
len
(
distribute_strategy
)]
assert
len
(
data
)
==
len
(
distribute_strategy
),
[
len
(
data
),
len
(
distribute_strategy
)]
...
@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
...
@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
if
type
(
data
)
==
dict
:
if
type
(
data
)
==
dict
:
yield
dict
(
zip
(
*
[
keys
,
temp
]))
yield
dict
(
zip
(
*
[
keys
,
temp
]))
else
:
else
:
print
(
'yielded pieces'
)
#
print('yielded pieces')
print
(
len
(
temp
))
#
print(len(temp))
yield
temp
yield
temp
def
data_feeder
(
reader
,
postprocess_fn
=
None
,
prefetch_steps
=
2
):
def
data_feeder
(
reader
,
postprocess_fn
=
None
,
prefetch_steps
=
2
,
phase
=
'train'
):
if
postprocess_fn
is
None
:
if
postprocess_fn
is
None
:
def
postprocess_fn
(
batch
):
def
postprocess_fn
(
batch
):
return
batch
return
batch
...
@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
...
@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue
.
task_done
()
queue
.
task_done
()
if
ret
is
not
None
:
if
ret
is
not
None
:
batches
,
num_pad
=
ret
batches
,
num_pad
=
ret
id
=
batches
[
0
][
'__task_id'
][
0
][
0
]
if
phase
==
'train'
else
-
1
batch_buf
=
[]
batch_buf
=
[]
flag_buf
=
[]
flag_buf
=
[]
for
idx
,
batch
in
enumerate
(
batches
):
for
idx
,
batch
in
enumerate
(
batches
):
...
@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
...
@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag
=
idx
-
len
(
batches
)
<
-
num_pad
flag
=
idx
-
len
(
batches
)
<
-
num_pad
# if num_pad > 0:
# if num_pad > 0:
# num_pad -= 1
# num_pad -= 1
batch
=
postprocess_fn
(
batch
)
batch
=
postprocess_fn
(
batch
,
id
)
batch_buf
.
append
(
batch
)
batch_buf
.
append
(
batch
)
flag_buf
.
append
(
flag
)
flag_buf
.
append
(
flag
)
yield
batch_buf
,
flag_buf
yield
batch_buf
,
flag_buf
,
id
else
:
else
:
break
break
queue
.
join
()
queue
.
join
()
def
decode_fake
(
nums
,
mask
,
bs
):
n_t
=
0
for
flag
in
mask
:
if
not
flag
:
break
n_t
=
n_t
+
1
n_f
=
len
(
mask
)
-
n_t
p1
=
nums
-
(
n_t
-
1
)
*
bs
each_f
=
p1
/
(
n_f
+
1
)
return
each_f
*
n_f
paddlepalm/optimizer/adam.py
浏览文件 @
df98c24f
...
@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
...
@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
if
self
.
_lr_schedualer
is
not
None
:
if
self
.
_lr_schedualer
is
not
None
:
self
.
_lr
=
self
.
_lr_schedualer
.
build
(
self
.
_lr
)
self
.
_lr
=
self
.
_lr_schedualer
.
build
(
self
.
_lr
)
fluid
.
layers
.
Print
(
self
.
_lr
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
self
.
_lr
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
self
.
_lr
)
if
grad_clip
is
not
None
:
if
grad_clip
is
not
None
:
...
@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
...
@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
fluid
.
clip
.
set_gradient_clip
(
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
clip_norm_thres
))
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
clip_norm_thres
))
print
(
self
.
_loss
)
_
,
param_grads
=
optimizer
.
minimize
(
self
.
_loss
)
_
,
param_grads
=
optimizer
.
minimize
(
self
.
_loss
)
return
param_grads
return
param_grads
...
...
paddlepalm/optimizer/base_optimizer.py
浏览文件 @
df98c24f
...
@@ -8,8 +8,9 @@ class BaseOptimizer():
...
@@ -8,8 +8,9 @@ class BaseOptimizer():
def
build
(
self
,
grad_clip
=
None
):
def
build
(
self
,
grad_clip
=
None
):
pass
pass
def
_set_prog
(
self
,
prog
):
def
_set_prog
(
self
,
prog
,
init_prog
):
self
.
_prog
=
prog
self
.
_prog
=
prog
self
.
_init_prog
=
prog
if
self
.
_lr_schedualer
is
not
None
:
if
self
.
_lr_schedualer
is
not
None
:
self
.
_lr_schedualer
.
_set_prog
(
prog
)
self
.
_lr_schedualer
.
_set_prog
(
prog
)
...
...
paddlepalm/trainer.py
浏览文件 @
df98c24f
...
@@ -21,7 +21,7 @@ import time
...
@@ -21,7 +21,7 @@ import time
import
numpy
as
np
import
numpy
as
np
import
paddlepalm.utils.basic_helper
as
helper
import
paddlepalm.utils.basic_helper
as
helper
from
paddlepalm.utils
import
reader_helper
,
saver
from
paddlepalm.utils
import
reader_helper
,
saver
from
paddlepalm.distribute
import
gpu_dev_count
,
data_feeder
from
paddlepalm.distribute
import
gpu_dev_count
,
data_feeder
,
decode_fake
# from paddlepalm.default_settings import *
# from paddlepalm.default_settings import *
DEBUG
=
False
DEBUG
=
False
...
@@ -217,12 +217,16 @@ class Trainer(object):
...
@@ -217,12 +217,16 @@ class Trainer(object):
with
fluid
.
program_guard
(
train_prog
,
train_init_prog
):
with
fluid
.
program_guard
(
train_prog
,
train_init_prog
):
loss_var
=
fluid
.
layers
.
reduce_sum
(
task_output_vars
[
self
.
name
+
'.loss'
])
loss_var
=
fluid
.
layers
.
reduce_sum
(
task_output_vars
[
self
.
name
+
'.loss'
])
self
.
_distribute_train_prog
=
fluid
.
CompiledProgram
(
self
.
_train_prog
).
with_data_parallel
(
loss_name
=
loss_var
.
name
)
for
_id
,
block
in
enumerate
(
self
.
_train_prog
.
blocks
):
for
var
in
block
.
vars
:
print
(
"[debug] : %d, %s"
%
(
_id
,
var
))
return
loss_var
return
loss_var
def
build_backward
(
self
,
optimizer
,
weight_decay
=
None
,
use_ema
=
False
,
ema_decay
=
0.9999
):
def
build_backward
(
self
,
optimizer
,
weight_decay
=
None
,
use_ema
=
False
,
ema_decay
=
0.9999
):
# build optimizer
# build optimizer
optimizer
.
_set_prog
(
self
.
_train_prog
)
assert
self
.
_train_init_prog
is
not
None
,
"train graph not foung! You should build_forward first."
optimizer
.
_set_prog
(
self
.
_train_prog
,
self
.
_train_init_prog
)
with
fluid
.
program_guard
(
self
.
_train_prog
,
self
.
_train_init_prog
):
with
fluid
.
program_guard
(
self
.
_train_prog
,
self
.
_train_init_prog
):
param_grads
=
optimizer
.
build
()
param_grads
=
optimizer
.
build
()
...
@@ -258,6 +262,13 @@ class Trainer(object):
...
@@ -258,6 +262,13 @@ class Trainer(object):
ema
=
fluid
.
optimizer
.
ExponentialMovingAverage
(
ema_decay
)
ema
=
fluid
.
optimizer
.
ExponentialMovingAverage
(
ema_decay
)
ema
.
update
()
ema
.
update
()
# for bid, block in enumerate(self._train_prog.blocks):
# print('block id: '+str(bid))
# for var in block.vars:
# print("%d : %s" % (bid, var))
# print(self._train_prog)
def
load_data
(
self
,
input_file
,
file_format
,
batch_size
,
num_epochs
=
None
,
shuffle_train
=
True
):
def
load_data
(
self
,
input_file
,
file_format
,
batch_size
,
num_epochs
=
None
,
shuffle_train
=
True
):
# load data
# load data
print
(
"preparing data..."
,
end
=
''
)
print
(
"preparing data..."
,
end
=
''
)
...
@@ -287,6 +298,7 @@ class Trainer(object):
...
@@ -287,6 +298,7 @@ class Trainer(object):
def
random_init_params
(
self
):
def
random_init_params
(
self
):
assert
self
.
_train_init_prog
is
not
None
,
"train graph not foung! You should build_forward first before you random init parameters."
assert
self
.
_train_init_prog
is
not
None
,
"train graph not foung! You should build_forward first before you random init parameters."
self
.
_distribute_train_prog
=
fluid
.
CompiledProgram
(
self
.
_train_prog
).
with_data_parallel
(
loss_name
=
loss_var
.
name
)
on_gpu
=
gpu_dev_count
>
0
on_gpu
=
gpu_dev_count
>
0
self
.
_exe
=
helper
.
build_executor
(
on_gpu
)
self
.
_exe
=
helper
.
build_executor
(
on_gpu
)
print
(
'random init params...'
)
print
(
'random init params...'
)
...
@@ -294,7 +306,7 @@ class Trainer(object):
...
@@ -294,7 +306,7 @@ class Trainer(object):
def
load_ckpt
(
self
,
model_path
,
phase
=
'train'
):
def
load_ckpt
(
self
,
model_path
,
phase
=
'train'
):
# load pretrain model (or ckpt)
# load pretrain model (or ckpt)
assert
self
.
_exe
is
not
None
,
"You need to random_init_params before load
pretrain model
s."
assert
self
.
_exe
is
not
None
,
"You need to random_init_params before load
checkpoint
s."
if
phase
==
'train'
:
if
phase
==
'train'
:
assert
self
.
_train_init_prog
is
not
None
,
"train graph not found! You should build_forward first before load checkpoint."
assert
self
.
_train_init_prog
is
not
None
,
"train graph not found! You should build_forward first before load checkpoint."
...
@@ -437,12 +449,12 @@ class Trainer(object):
...
@@ -437,12 +449,12 @@ class Trainer(object):
def
predict_one_batch
(
self
,
batch
):
def
predict_one_batch
(
self
,
batch
):
if
gpu_dev_count
>
1
:
if
gpu_dev_count
>
1
:
feed
,
mask
=
batch
feed
,
mask
=
batch
rt_outputs
=
self
.
exe
.
run
(
self
.
_distribute_
train
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
self
.
exe
.
run
(
self
.
_distribute_
pred
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
while
mask
.
pop
()
==
False
:
while
mask
.
pop
()
==
False
:
rt_outputs
.
pop
()
rt_outputs
.
pop
()
else
:
else
:
feed
=
self
.
_feed_batch_process_fn
(
batch
)
feed
=
self
.
_feed_batch_process_fn
(
batch
)
rt_outputs
=
self
.
_exe
.
run
(
self
.
_distribute_
train
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
self
.
_exe
.
run
(
self
.
_distribute_
pred
_prog
,
feed
=
feed
,
fetch_list
=
self
.
_fetch_list
)
rt_outputs
=
{
k
:
v
for
k
,
v
in
zip
(
self
.
_fetch_names
,
rt_outputs
)}
rt_outputs
=
{
k
:
v
for
k
,
v
in
zip
(
self
.
_fetch_names
,
rt_outputs
)}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录