基于ERNIE 1.0构建计算图出core,无明显错误信息反馈(疑似构建反向网络出错)
Created by: moberq
-
版本、环境信息: 1)PaddlePaddle版本:1.5.1 2)GPU:V100 16GB显存,CUDA 9.0,CUDNN 7.4 3)系统环境:CentOS Linux release 7.5.1804,Python 2.7.15
-
训练信息: 1)单机,多卡 2)显存信息:单卡16GB 3)Operator信息: 大部分同ERNIE 1.0。除此之外,为提升解码端性能而DIY向量内积计算,用到了gather/unsqueeze/expand/pointwise乘法、加法等操作。
-
复现信息: 环境配置如上,运行主训练程序至根据loss计算反向梯度阶段出core(数据pipelines能单独跑通,此时并未进入到实际灌训练数据阶段),无其他错误信息。 optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr) -> optimizer.minimize(loss)
-
问题描述:
1)问题概括:根据具体任务对ERNIE 1.0的解码端进行了修改,对每个mask位置的输出向量的解码:由通过与输入word_embeddings的矩阵乘法计算整个词表上的概率分布,改为在负采样生成的受限词表通过DIY向量内积(向量pointwise乘法、reduce加和)来计算概率分布,同时删掉了next_sentence预测的相关loss。
2)代码片段 ERNIE 1.0的相关原始代码:
def get_pretraining_output(self, mask_label, mask_pos, labels):
"""Get the loss & accuracy for pretraining"""
mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
# extract the first token feature in each sentence
next_sent_feat = self.get_pooled_output()
reshaped_emb_out = fluid.layers.reshape(
x=self._enc_out, shape=[-1, self._emb_size])
# extract masked tokens' feature
mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)
if self._dtype == "float16":
mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype)
# transform: fc
mask_trans_feat = fluid.layers.fc(
input=mask_feat,
size=self._emb_size,
act=self._hidden_act,
param_attr=fluid.ParamAttr(
name='mask_lm_trans_fc.w_0',
initializer=self._param_initializer),
bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0'))
# transform: layer norm
mask_trans_feat = fluid.layers.layer_norm(
mask_trans_feat,
begin_norm_axis=len(mask_trans_feat.shape) - 1,
param_attr=fluid.ParamAttr(
name='mask_lm_trans_layer_norm_scale',
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
name='mask_lm_trans_layer_norm_bias',
initializer=fluid.initializer.Constant(1.)))
# transform: layer norm
# mask_trans_feat = pre_process_layer(
# mask_trans_feat, 'n', name='mask_lm_trans')
mask_lm_out_bias_attr = fluid.ParamAttr(
name="mask_lm_out_fc.b_0",
initializer=fluid.initializer.Constant(value=0.0))
if self._weight_sharing:
fc_out = fluid.layers.matmul(
x=mask_trans_feat,
y=fluid.default_main_program().global_block().var(
self._word_emb_name),
transpose_y=True)
fc_out += fluid.layers.create_parameter(
shape=[self._voc_size],
dtype=self._emb_dtype,
attr=mask_lm_out_bias_attr,
is_bias=True)
else:
fc_out = fluid.layers.fc(input=mask_trans_feat,
size=self._voc_size,
param_attr=fluid.ParamAttr(
name="mask_lm_out_fc.w_0",
initializer=self._param_initializer),
bias_attr=mask_lm_out_bias_attr)
mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
logits=fc_out, label=mask_label)
mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss)
next_sent_fc_out = fluid.layers.fc(
input=next_sent_feat,
size=2,
param_attr=fluid.ParamAttr(
name="next_sent_fc.w_0", initializer=self._param_initializer),
bias_attr="next_sent_fc.b_0")
next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy(
logits=next_sent_fc_out, label=labels, return_softmax=True)
# L.Print(next_sent_softmax, message='softmax')
# L.Print(labels, message='label')
next_sent_acc = fluid.layers.accuracy(
input=next_sent_softmax, label=labels)
mean_next_sent_loss = fluid.layers.mean(next_sent_loss)
loss = mean_next_sent_loss + mean_mask_lm_loss
return next_sent_acc, mean_mask_lm_loss, loss
基于ERNIE 1.0修改后的代码: P.S. 除了参数中新增decode_candidates_ids,mlm_labels两个tensor,其余保留的tensors与原ERNIE 1.0完全一致。参数中tensors的shape说明如下: mask_pos:[batch_mask_len] decode_candidates_ids:[batch_mask_len * decode_size, 1] mlm_labels:[batch_mask_len, 1]
程序实际设定的shape大小均为 [-1, 1] 其中batch_mask_len为一个batch中被mask掉的位置数之和。
def get_restricted_masked_lm_output(self, mask_pos, decode_candidates_ids, mlm_labels, decode_size=10):
"""Get the loss & accuracy for restricted masked lm."""
mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
decode_bias_pos = fluid.layers.reshape(
x=decode_candidates_ids, shape=[-1])
decode_bias_pos = fluid.layers.cast(x=decode_bias_pos, dtype='int32')
reshaped_emb_out = fluid.layers.reshape(
x=self._enc_out, shape=[-1, self._emb_size])
decode_candidates_ids = fluid.layers.reshape(
x=decode_candidates_ids, shape=[-1, decode_size, 1])
# extract masked tokens' feature
mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)
if self._dtype == "float16":
mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype)
# transform: fc
mask_trans_feat = fluid.layers.fc(
input=mask_feat,
size=self._emb_size,
act=self._hidden_act,
param_attr=fluid.ParamAttr(
name='mask_lm_trans_fc.w_0',
initializer=self._param_initializer),
bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0'))
# transform: layer norm
mask_trans_feat = fluid.layers.layer_norm(
mask_trans_feat,
begin_norm_axis=len(mask_trans_feat.shape) - 1,
param_attr=fluid.ParamAttr(
name='mask_lm_trans_layer_norm_scale',
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
name='mask_lm_trans_layer_norm_bias',
initializer=fluid.initializer.Constant(1.)))
mask_lm_out_bias_attr = fluid.ParamAttr(
name="mask_lm_out_fc.b_0",
initializer=fluid.initializer.Constant(value=0.0))
output_bias = fluid.layers.create_parameter(
shape=[self._voc_size],
dtype=self._emb_dtype,
attr=mask_lm_out_bias_attr,
is_bias=True)
if self._weight_sharing:
decode_weights = fluid.layers.embedding(
input=decode_candidates_ids,
size=[self._voc_size, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
decode_bias = fluid.layers.gather(input=output_bias, index=decode_bias_pos)
decode_bias = fluid.layers.reshape(x=decode_bias, shape=[-1, decode_size])
mask_trans_feat = fluid.layers.unsqueeze(input=mask_trans_feat, axes=[1])
mask_trans_feat = fluid.layers.expand(x=mask_trans_feat, expand_times=[1, decode_size, 1])
fc_out = fluid.layers.reduce_sum(mask_trans_feat * decode_weights, dim=-1)
fc_out += decode_bias
else:
raise NotImplementedError()
mask_lm_loss, mask_lm_softmax = fluid.layers.softmax_with_cross_entropy(
logits=fc_out, label=mlm_labels, return_softmax=True)
mask_lm_acc = fluid.layers.accuracy(
input=mask_lm_softmax, label=mlm_labels)
mask_lm_loss = fluid.layers.mean(mask_lm_loss)
return mask_lm_acc, mask_lm_loss
3)相关日志信息
+ output_dir=./paddle-ckpts/ernie_restricted_lm/
+ max_step=500000
+ batch_size=64
+ lr=2.0e-5
+ echo 'Max_step 500000 batch_size 64 lr 2.0e-5'
Max_step 500000 batch_size 64 lr 2.0e-5
+ alias 'python=/usr/lib64/ld-linux-x86-64.so.2 --library-path /mnt/fzh/tools/cuda-9.0/lib64/:/mnt/fzh/tools/cuda/lib64 /mnt/fzh/tools/anaconda2/bin/python'
+ export FLAGS_enable_parallel_graph=1
+ FLAGS_enable_parallel_graph=1
+ export FLAGS_sync_nccl_allreduce=1
+ FLAGS_sync_nccl_allreduce=1
+ export GLOG_v=4
+ GLOG_v=4
+ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+ echo 'cuda env 0,1,2,3,4,5,6,7'
cuda env 0,1,2,3,4,5,6,7
+ export PYTHONPATH=./paddle-estimator/:
+ PYTHONPATH=./paddle-estimator/:
+ /usr/lib64/ld-linux-x86-64.so.2 --library-path /mnt/fzh/tools/cuda-9.0/lib64/:/mnt/fzh/tools/cuda/lib64 /mnt/fzh/tools/anaconda2/bin/python -u ./pretrain.py --run_config '{
"model_dir": "./paddle-ckpts/ernie_restricted_lm/",
"max_steps": 500000,
"save_steps": 10000,
"log_steps": 100,
"skip_steps": 0, # comment
"eval_steps": 10000,
"max_ckpt": 5,
"shit": 0
}' --hparam '{
"batch_size": 64,
"attention_probs_dropout_prob": 0.1,
"directionality": "bidi",
"hidden_act": "relu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 513,
"num_attention_heads": 12,
"num_hidden_layers": 3,
"type_vocab_size": 2,
"vocab_size": 18000,
"warmup_proportion": 0.1,
"weight_decay": 0.01,
"warmup_steps": 10000,
"use_fp16": 0,
"learning_rate": 2.0e-5,
"shit": 0
}'
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0919 11:50:48.924957 49317 init.cc:67] Init commandline: dummy ./pretrain.py --tryfromenv=check_nan_inf,benchmark,eager_delete_scope,initial_cpu_memory_in_mb,init_allocated_mem,free_idle_memory,paddle_num_threads,dist_threadpool_size,eager_delete_tensor_gb,fast_eager_deletion_mode,memory_fraction_of_eager_deletion,allocator_strategy,reader_queue_speed_test_mode,print_sub_graph_dir,pe_profile_fname,inner_op_parallelism,enable_parallel_graph,fuse_parameter_groups_size,multiple_of_cupti_buffer_size,fuse_parameter_memory_size,tracer_profile_fname,dygraph_debug,use_pinned_memory,cpu_deterministic,use_mkldnn,rpc_deadline,rpc_server_profile_path,enable_rpc_profiler,rpc_send_thread_num,rpc_get_thread_num,rpc_prefetch_thread_num,rpc_disable_reuse_port,communicator_independent_recv_thread,communicator_send_queue_size,communicator_min_send_grad_num_before_recv,communicator_thread_pool_size,communicator_max_merge_var_num,communicator_fake_rpc,communicator_send_wait_times,fraction_of_gpu_memory_to_use,initial_gpu_memory_in_mb,reallocate_gpu_memory_in_mb,cudnn_deterministic,enable_cublas_tensor_op_math,conv_workspace_size_limit,cudnn_exhaustive_search,selected_gpus,sync_nccl_allreduce,limit_of_tmp_allocation,times_excess_than_required_tmp_allocation,enable_inplace_whitelist,cudnn_batchnorm_spatial_persistent
[INFO] 2019-09-19 11:50:53,468 [ pretrain.py: 97]: read from ./data/train_gz/part-00088.pb
./data/train_gz/part-00056.pb
./data/train_gz/part-00090.pb
./data/train_gz/part-00040.pb
./data/train_gz/part-00086.pb
./data/train_gz/part-00025.pb
./data/train_gz/part-00006.pb
./data/train_gz/part-00024.pb
./data/train_gz/part-00034.pb
./data/train_gz/part-00051.pb
./data/train_gz/part-00054.pb
./data/train_gz/part-00058.pb
./data/train_gz/part-00073.pb
./data/train_gz/part-00094.pb
./data/train_gz/part-00085.pb
./data/train_gz/part-00030.pb
./data/train_gz/part-00096.pb
./data/train_gz/part-00063.pb
./data/train_gz/part-00093.pb
./data/train_gz/part-00082.pb
./data/train_gz/part-00075.pb
./data/train_gz/part-00049.pb
./data/train_gz/part-00036.pb
./data/train_gz/part-00062.pb
./data/train_gz/part-00028.pb
./data/train_gz/part-00057.pb
./data/train_gz/part-00000.pb
./data/train_gz/part-00011.pb
./data/train_gz/part-00098.pb
./data/train_gz/part-00017.pb
./data/train_gz/part-00077.pb
./data/train_gz/part-00080.pb
./data/train_gz/part-00007.pb
./data/train_gz/part-00031.pb
./data/train_gz/part-00059.pb
./data/train_gz/part-00037.pb
./data/train_gz/part-00053.pb
./data/train_gz/part-00089.pb
./data/train_gz/part-00072.pb
./data/train_gz/part-00078.pb
./data/train_gz/part-00061.pb
./data/train_gz/part-00003.pb
./data/train_gz/part-00004.pb
./data/train_gz/part-00092.pb
./data/train_gz/part-00087.pb
./data/train_gz/part-00023.pb
./data/train_gz/part-00001.pb
./data/train_gz/part-00045.pb
./data/train_gz/part-00027.pb
./data/train_gz/part-00055.pb
./data/train_gz/part-00020.pb
./data/train_gz/part-00018.pb
./data/train_gz/part-00069.pb
./data/train_gz/part-00065.pb
./data/train_gz/part-00021.pb
./data/train_gz/part-00022.pb
./data/train_gz/part-00070.pb
./data/train_gz/part-00042.pb
./data/train_gz/part-00002.pb
./data/train_gz/part-00099.pb
./data/train_gz/part-00014.pb
./data/train_gz/part-00013.pb
./data/train_gz/part-00019.pb
./data/train_gz/part-00060.pb
./data/train_gz/part-00008.pb
./data/train_gz/part-00033.pb
./data/train_gz/part-00005.pb
./data/train_gz/part-00038.pb
./data/train_gz/part-00029.pb
./data/train_gz/part-00010.pb
./data/train_gz/part-00064.pb
./data/train_gz/part-00084.pb
./data/train_gz/part-00066.pb
./data/train_gz/part-00015.pb
./data/train_gz/part-00067.pb
./data/train_gz/part-00074.pb
./data/train_gz/part-00043.pb
./data/train_gz/part-00097.pb
./data/train_gz/part-00095.pb
./data/train_gz/part-00012.pb
./data/train_gz/part-00050.pb
./data/train_gz/part-00071.pb
./data/train_gz/part-00047.pb
./data/train_gz/part-00079.pb
./data/train_gz/part-00076.pb
./data/train_gz/part-00046.pb
./data/train_gz/part-00009.pb
./data/train_gz/part-00039.pb
./data/train_gz/part-00083.pb
./data/train_gz/part-00016.pb
./data/train_gz/part-00052.pb
./data/train_gz/part-00091.pb
./data/train_gz/part-00044.pb
./data/train_gz/part-00081.pb
./data/train_gz/part-00068.pb
./data/train_gz/part-00026.pb
./data/train_gz/part-00035.pb
./data/train_gz/part-00032.pb
./data/train_gz/part-00048.pb
./data/train_gz/part-00041.pb
[INFO] 2019-09-19 11:50:53,469 [ trainer.py: 263]: Building Train Graph...
I0919 11:50:53.474010 49317 op_desc.cc:682] CompileTime infer shape on reduce_sum
I0919 11:50:53.476068 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.476398 49317 op_desc.cc:682] CompileTime infer shape on fill_zeros_like
I0919 11:50:53.476651 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.476991 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.477275 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.477566 49317 op_desc.cc:682] CompileTime infer shape on reduce_sum
I0919 11:50:53.477798 49317 op_desc.cc:682] CompileTime infer shape on elementwise_sub
I0919 11:50:53.478021 49317 op_desc.cc:682] CompileTime infer shape on elementwise_div
I0919 11:50:53.478271 49317 op_desc.cc:682] CompileTime infer shape on fill_zeros_like
I0919 11:50:53.478525 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.478751 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.478962 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.479254 49317 op_desc.cc:682] CompileTime infer shape on reduce_sum
I0919 11:50:53.479497 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.479714 49317 op_desc.cc:682] CompileTime infer shape on elementwise_div
I0919 11:50:53.479971 49317 op_desc.cc:682] CompileTime infer shape on reduce_mean
I0919 11:50:53.480186 49317 op_desc.cc:682] CompileTime infer shape on elementwise_div
I0919 11:50:53.480464 49317 op_desc.cc:682] CompileTime infer shape on reduce_mean
I0919 11:50:53.480722 49317 op_desc.cc:682] CompileTime infer shape on reduce_mean
I0919 11:50:53.480950 49317 op_desc.cc:682] CompileTime infer shape on reduce_mean
I0919 11:50:53.481182 49317 op_desc.cc:682] CompileTime infer shape on reduce_mean
I0919 11:50:53.482177 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.482600 49317 op_desc.cc:682] CompileTime infer shape on lookup_table
I0919 11:50:53.483024 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.483376 49317 op_desc.cc:682] CompileTime infer shape on lookup_table
I0919 11:50:53.483785 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.484095 49317 op_desc.cc:682] CompileTime infer shape on lookup_table
I0919 11:50:53.484390 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.484628 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.485260 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.485693 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.486071 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.486454 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.486764 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.487051 49317 op_desc.cc:682] CompileTime infer shape on scale
I0919 11:50:53.487370 49317 op_desc.cc:682] CompileTime infer shape on stack
I0919 11:50:53.487879 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.488222 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.488248 49317 mul_op.cc:43] mul operator x.shape=-1, 64, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.488656 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.488979 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.489452 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.489775 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.489796 49317 mul_op.cc:43] mul operator x.shape=-1, 64, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.490180 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.490540 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.490990 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.491348 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.491372 49317 mul_op.cc:43] mul operator x.shape=-1, 64, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.491791 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.492440 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.492838 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.493206 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.493547 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.493870 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.494155 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.494514 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.494807 49317 op_desc.cc:682] CompileTime infer shape on scale
I0919 11:50:53.495100 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.495401 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.495659 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.495963 49317 op_desc.cc:682] CompileTime infer shape on softmax
I0919 11:50:53.496313 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.496620 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.496940 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.497256 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.497736 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.498037 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.498057 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.498512 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.498833 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.499156 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.499464 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.499919 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.500398 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.500795 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.501293 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.501610 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.501631 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 3072 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.502048 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.502409 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.502701 49317 op_desc.cc:682] CompileTime infer shape on relu
I0919 11:50:53.503167 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.503518 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.503541 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 3072 y.shape=3072, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.503962 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.504295 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.504631 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.504885 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.505404 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.505885 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.506279 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.506814 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.507135 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.507156 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.507644 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.507982 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.508509 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.508822 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.508844 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.509310 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.509642 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.510135 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.510485 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.510509 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.510962 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.511312 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.511636 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.511953 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.512270 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.512594 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.512879 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.513219 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.513602 49317 op_desc.cc:682] CompileTime infer shape on scale
I0919 11:50:53.513958 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.514313 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.514633 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.514999 49317 op_desc.cc:682] CompileTime infer shape on softmax
I0919 11:50:53.515353 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.515666 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.515975 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.516291 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.516805 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.517127 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.517149 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.517637 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.517954 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.518306 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.518579 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.519115 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.519649 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.520025 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.520584 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.520892 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.520912 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 3072 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.521400 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.521723 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.521997 49317 op_desc.cc:682] CompileTime infer shape on relu
I0919 11:50:53.522536 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.522853 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.522874 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 3072 y.shape=3072, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.523401 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.523727 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.524053 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.524339 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.524893 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.525449 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.525919 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.526528 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.526849 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.526870 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.527403 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.527727 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.528298 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.528622 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.528643 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.529145 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.529515 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.530079 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.530428 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.530452 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.530995 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.531339 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.531673 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.531994 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.532315 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.532647 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.532939 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.533272 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.533576 49317 op_desc.cc:682] CompileTime infer shape on scale
I0919 11:50:53.533861 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.534123 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.534407 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.534700 49317 op_desc.cc:682] CompileTime infer shape on softmax
I0919 11:50:53.535015 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.535334 49317 op_desc.cc:682] CompileTime infer shape on matmul
I0919 11:50:53.535678 49317 op_desc.cc:682] CompileTime infer shape on transpose2
I0919 11:50:53.535969 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.536576 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.536893 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.536916 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.537470 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.537789 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.538125 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.538424 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.538996 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.539588 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.539975 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.540593 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.540904 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.540925 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 768 y.shape=768, 3072 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.541492 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.541821 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.542093 49317 op_desc.cc:682] CompileTime infer shape on relu
I0919 11:50:53.542695 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.543022 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.543043 49317 mul_op.cc:43] mul operator x.shape=-1, 1, 3072 y.shape=3072, 768 x_num_col_dims=2 y_num_col_dims=1
I0919 11:50:53.543596 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.543906 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.544250 49317 op_desc.cc:682] CompileTime infer shape on dropout
I0919 11:50:53.544526 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.545125 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.545730 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.546105 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.546427 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.546720 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.546983 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.547279 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.547577 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.547857 49317 op_desc.cc:682] CompileTime infer shape on gather
I0919 11:50:53.548460 49317 op_desc.cc:682] CompileTime infer shape on truncated_gaussian_random
I0919 11:50:53.548772 49317 op_desc.cc:682] CompileTime infer shape on mul
I0919 11:50:53.548794 49317 mul_op.cc:43] mul operator x.shape=-1, 768 y.shape=768, 768 x_num_col_dims=1 y_num_col_dims=1
I0919 11:50:53.549329 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.549628 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.549860 49317 op_desc.cc:682] CompileTime infer shape on relu
I0919 11:50:53.550446 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.550999 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.551383 49317 op_desc.cc:682] CompileTime infer shape on layer_norm
I0919 11:50:53.551959 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.552657 49317 op_desc.cc:682] CompileTime infer shape on lookup_table
I0919 11:50:53.552933 49317 op_desc.cc:682] CompileTime infer shape on gather
I0919 11:50:53.553236 49317 op_desc.cc:682] CompileTime infer shape on reshape2
I0919 11:50:53.553551 49317 op_desc.cc:682] CompileTime infer shape on unsqueeze2
I0919 11:50:53.553814 49317 op_desc.cc:682] CompileTime infer shape on expand
I0919 11:50:53.554075 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.554366 49317 op_desc.cc:682] CompileTime infer shape on reduce_sum
I0919 11:50:53.554610 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.554944 49317 op_desc.cc:682] CompileTime infer shape on softmax_with_cross_entropy
I0919 11:50:53.555415 49317 op_desc.cc:682] CompileTime infer shape on top_k
I0919 11:50:53.555770 49317 op_desc.cc:682] CompileTime infer shape on accuracy
I0919 11:50:53.556025 49317 op_desc.cc:682] CompileTime infer shape on mean
I0919 11:50:53.556506 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.557021 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.557303 49317 op_desc.cc:682] CompileTime infer shape on increment
I0919 11:50:53.557646 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.557984 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.558431 49317 op_desc.cc:682] CompileTime infer shape on less_than
I0919 11:50:53.559255 49317 op_desc.cc:682] CompileTime infer shape on logical_not
I0919 11:50:53.559618 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.559860 49317 op_desc.cc:682] CompileTime infer shape on elementwise_div
I0919 11:50:53.560101 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.560369 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.560587 49317 op_desc.cc:682] CompileTime infer shape on assign
I0919 11:50:53.561270 49317 op_desc.cc:682] CompileTime infer shape on cast
I0919 11:50:53.561565 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.561848 49317 op_desc.cc:682] CompileTime infer shape on elementwise_min
I0919 11:50:53.562095 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.562367 49317 op_desc.cc:682] CompileTime infer shape on elementwise_div
I0919 11:50:53.562623 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.562857 49317 op_desc.cc:682] CompileTime infer shape on elementwise_sub
I0919 11:50:53.563097 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.563355 49317 op_desc.cc:682] CompileTime infer shape on elementwise_pow
I0919 11:50:53.563604 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.563832 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.564065 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.564313 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add
I0919 11:50:53.564544 49317 op_desc.cc:682] CompileTime infer shape on assign
I0919 11:50:53.566512 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.566939 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.567167 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.567459 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.567690 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.567926 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.568137 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.568399 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.568629 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.568861 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.569083 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.569350 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.569576 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.569809 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.570019 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.570271 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.570508 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.570739 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.570950 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.571185 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.571437 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.571681 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.571900 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.572130 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.572379 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.572618 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.572829 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.573060 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.573299 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.573542 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.573760 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.573987 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.574218 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.574474 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.574697 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.574925 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.575155 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.575423 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.575640 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.575876 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.576090 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.576354 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.576591 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.576825 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.577044 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.577311 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.577545 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.577795 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.578008 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.578269 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.578510 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.578752 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.578963 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.579224 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.579464 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.579701 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.579913 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.580142 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.580397 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.580657 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.580874 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.581183 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.581435 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.581665 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.581879 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.582100 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.582358 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.582613 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.582834 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.583149 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.583531 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.583791 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.584017 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.584278 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.584512 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.584744 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.584954 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.585181 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.585439 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.585677 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.585889 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.586113 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.586362 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.586599 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.586812 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.587052 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.587306 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.587550 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.587762 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.587986 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.588204 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.588476 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.588701 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.588928 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.589136 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.589416 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.589634 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.589874 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.590090 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.590353 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.590579 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.590808 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.591017 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.591279 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.591509 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.591742 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.591965 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.592201 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.592440 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.592670 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.592881 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.593106 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.593359 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.593602 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.593832 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.594076 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.594317 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul
I0919 11:50:53.628777 49317 op_desc.cc:682] CompileTime infer shape on fill_constant
I0919 11:50:53.628852 49317 op_desc.cc:682] CompileTime infer shape on mean_grad
I0919 11:50:53.628899 49317 op_desc.cc:682] CompileTime infer shape on softmax_with_cross_entropy_grad
I0919 11:50:53.628964 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add_grad
I0919 11:50:53.629027 49317 op_desc.cc:682] CompileTime infer shape on reduce_sum_grad
I0919 11:50:53.629079 49317 op_desc.cc:682] CompileTime infer shape on elementwise_mul_grad
I0919 11:50:53.629142 49317 op_desc.cc:682] CompileTime infer shape on expand_grad
I0919 11:50:53.629184 49317 op_desc.cc:682] CompileTime infer shape on unsqueeze2_grad
I0919 11:50:53.629251 49317 op_desc.cc:682] CompileTime infer shape on reshape2_grad
I0919 11:50:53.629333 49317 op_desc.cc:682] CompileTime infer shape on gather_grad
I0919 11:50:53.629384 49317 lookup_table_op.cc:178] lookup_table_grad op W@GRAD is set to LoDTensor
I0919 11:50:53.629395 49317 op_desc.cc:682] CompileTime infer shape on lookup_table_grad
I0919 11:50:53.629448 49317 op_desc.cc:682] CompileTime infer shape on layer_norm_grad
I0919 11:50:53.629523 49317 op_desc.cc:682] CompileTime infer shape on relu_grad
I0919 11:50:53.629571 49317 op_desc.cc:682] CompileTime infer shape on elementwise_add_grad
I0919 11:50:53.629638 49317 op_desc.cc:682] CompileTime infer shape on mul_grad
I0919 11:50:53.629693 49317 op_desc.cc:682] CompileTime infer shape on gather_grad
I0919 11:50:53.629734 49317 op_desc.cc:682] CompileTime infer shape on reshape2_grad
I0919 11:50:53.629773 49317 op_desc.cc:682] CompileTime infer shape on cast
run_pretrain.sh: line 55: 49317 Segmentation fault (core dumped) /usr/lib64/ld-linux-x86-64.so.2 --library-path /mnt/fzh/tools/cuda-9.0/lib64/:/mnt/fzh/tools/cuda/lib64 /mnt/fzh/tools/anaconda2/bin/python -u ./pretrain.py --run_config '{
"model_dir": "'${output_dir}'",
"max_steps": '${max_step}',
"save_steps": 10000,
"log_steps": 100,
"skip_steps": 0, # comment
"eval_steps": 10000,
"max_ckpt": 5,
"shit": 0
}' --hparam '{
"batch_size": '${batch_size}',
"attention_probs_dropout_prob": 0.1,
"directionality": "bidi",
"hidden_act": "relu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 513,
"num_attention_heads": 12,
"num_hidden_layers": 3,
"type_vocab_size": 2,
"vocab_size": 18000,
"warmup_proportion": 0.1,
"weight_decay": 0.01,
"warmup_steps": 10000,
"use_fp16": 0,
"learning_rate": '${lr}',
"shit": 0
}'