未验证 提交 d0299b81 编写于 作者: Y Yibing Liu 提交者: GitHub

Fix div zero error in lr decay (#3694)

* Fix div zero error in lr decay

* Update readme
上级 8db0319c
......@@ -70,7 +70,7 @@
```
## 安装
本项目依赖于 Paddle Fluid **1.5.1** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。
本项目依赖于 Paddle Fluid **1.6.0** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。
## 预训练
......
......@@ -26,13 +26,13 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]],
[None, args.max_seq_len, 1], [None, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0],
}
inputs = [
fluid.layers.data(
fluid.data(
name=input_fields['names'][i],
shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i],
......
......@@ -73,9 +73,10 @@ def optimization(loss,
.noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
else:
printf(
"WARNING: noam decay should have postive warmup steps, using "
"constant learning rate instead!")
print(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
......@@ -83,8 +84,20 @@ def optimization(loss,
dtype='float32',
persistable=True)
elif scheduler == 'linear_warmup_decay':
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
if warmup_steps > 0:
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
......
......@@ -111,7 +111,7 @@ def create_model(bert_config, is_training=False):
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
[None, args.max_seq_len, 1], [None, 1], [None, 1]],
'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
'lod_levels': [0, 0, 0, 0, 0, 0],
......@@ -120,13 +120,13 @@ def create_model(bert_config, is_training=False):
input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]],
[None, args.max_seq_len, 1], [None, 1]],
'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0],
}
inputs = [fluid.layers.data(name=input_fields['names'][i],
inputs = [fluid.data(name=input_fields['names'][i],
shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i],
lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))]
......
......@@ -105,7 +105,7 @@ def create_model(bert_config):
'lod_levels': [0, 0, 0, 0, 0, 0, 0],
}
inputs = [fluid.layers.data(name=input_fields['names'][i],
inputs = [fluid.data(name=input_fields['names'][i],
shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i],
lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册