未验证 提交 d0299b81 编写于 作者: Y Yibing Liu 提交者: GitHub

Fix div zero error in lr decay (#3694)

* Fix div zero error in lr decay

* Update readme
上级 8db0319c
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
``` ```
## 安装 ## 安装
本项目依赖于 Paddle Fluid **1.5.1** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。 本项目依赖于 Paddle Fluid **1.6.0** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。
## 预训练 ## 预训练
......
...@@ -26,13 +26,13 @@ def create_model(args, bert_config, num_labels, is_prediction=False): ...@@ -26,13 +26,13 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
'shapes': [[None, None], [None, None], [None, None], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]], [None, args.max_seq_len, 1], [None, 1]],
'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0],
} }
inputs = [ inputs = [
fluid.layers.data( fluid.data(
name=input_fields['names'][i], name=input_fields['names'][i],
shape=input_fields['shapes'][i], shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i], dtype=input_fields['dtypes'][i],
......
...@@ -73,9 +73,10 @@ def optimization(loss, ...@@ -73,9 +73,10 @@ def optimization(loss,
.noam_decay(1/(warmup_steps *(learning_rate ** 2)), .noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps) warmup_steps)
else: else:
printf( print(
"WARNING: noam decay should have postive warmup steps, using " "WARNING: noam decay of learning rate should have postive warmup "
"constant learning rate instead!") "steps but given {}, using constant learning rate instead!"
.format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var( scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"), name=fluid.unique_name.generate("learning_rate"),
shape=[1], shape=[1],
...@@ -83,8 +84,20 @@ def optimization(loss, ...@@ -83,8 +84,20 @@ def optimization(loss,
dtype='float32', dtype='float32',
persistable=True) persistable=True)
elif scheduler == 'linear_warmup_decay': elif scheduler == 'linear_warmup_decay':
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, if warmup_steps > 0:
num_train_steps) scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
else: else:
raise ValueError("Unkown learning rate scheduler, should be " raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'") "'noam_decay' or 'linear_warmup_decay'")
......
...@@ -111,7 +111,7 @@ def create_model(bert_config, is_training=False): ...@@ -111,7 +111,7 @@ def create_model(bert_config, is_training=False):
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'],
'shapes': [[None, None], [None, None], [None, None], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], [None, args.max_seq_len, 1], [None, 1], [None, 1]],
'dtypes': [ 'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
'lod_levels': [0, 0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0, 0],
...@@ -120,13 +120,13 @@ def create_model(bert_config, is_training=False): ...@@ -120,13 +120,13 @@ def create_model(bert_config, is_training=False):
input_fields = { input_fields = {
'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
'shapes': [[None, None], [None, None], [None, None], 'shapes': [[None, None], [None, None], [None, None],
[-1, args.max_seq_len, 1], [-1, 1]], [None, args.max_seq_len, 1], [None, 1]],
'dtypes': [ 'dtypes': [
'int64', 'int64', 'int64', 'float32', 'int64'], 'int64', 'int64', 'int64', 'float32', 'int64'],
'lod_levels': [0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0],
} }
inputs = [fluid.layers.data(name=input_fields['names'][i], inputs = [fluid.data(name=input_fields['names'][i],
shape=input_fields['shapes'][i], shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i], dtype=input_fields['dtypes'][i],
lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))]
......
...@@ -105,7 +105,7 @@ def create_model(bert_config): ...@@ -105,7 +105,7 @@ def create_model(bert_config):
'lod_levels': [0, 0, 0, 0, 0, 0, 0], 'lod_levels': [0, 0, 0, 0, 0, 0, 0],
} }
inputs = [fluid.layers.data(name=input_fields['names'][i], inputs = [fluid.data(name=input_fields['names'][i],
shape=input_fields['shapes'][i], shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i], dtype=input_fields['dtypes'][i],
lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册