未验证 提交 5a41865d 编写于 作者: 0 0YuanZhang0 提交者: GitHub

Upgrade dialogue models to paddle 1.8 (#4594)

* upgrade_dialogue_models

* fix_upgrade_models_comments
上级 a5c2db94
...@@ -10,11 +10,11 @@ ...@@ -10,11 +10,11 @@
## 快速开始 ## 快速开始
**目前模型要求使用PaddlePaddle 1.6及以上版本或适当的develop版本运行。** **目前模型要求使用PaddlePaddle 1.8及以上版本或适当的develop版本运行。**
### 1. Paddle版本安装 ### 1. Paddle版本安装
本项目训练模块兼容Python2.7.x以及Python3.7.x, 依赖PaddlePaddle 1.6版本以及CentOS系统环境, 安装请参考官网 [快速安装](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/install/index_cn.html) 本项目训练模块兼容Python2.7.x以及Python3.7.x, 依赖PaddlePaddle 1.8版本以及CentOS系统环境, 安装请参考官网 [快速安装](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/install/index_cn.html)
注意:该模型同时支持cpu和gpu训练和预测,用户可以根据自身需求,选择安装对应的paddlepaddle-gpu或paddlepaddle版本。 注意:该模型同时支持cpu和gpu训练和预测,用户可以根据自身需求,选择安装对应的paddlepaddle-gpu或paddlepaddle版本。
......
文件模式从 100755 更改为 100644
...@@ -452,7 +452,7 @@ def main(args): ...@@ -452,7 +452,7 @@ def main(args):
if args.use_cuda: if args.use_cuda:
test_place = fluid.cuda_places(0) test_place = fluid.cuda_places(0)
place = fluid.cuda_places() place = fluid.cuda_places()
DEV_COUNT = fluid.core.get_cuda_device_count() DEV_COUNT = len(place)
else: else:
test_place = fluid.cpu_places(1) test_place = fluid.cpu_places(1)
os.environ['CPU_NUM'] = str(args.cpu_num) os.environ['CPU_NUM'] = str(args.cpu_num)
......
...@@ -130,12 +130,11 @@ class DataReader(object): ...@@ -130,12 +130,11 @@ class DataReader(object):
assert os.path.exists(data_path), "The given data file does not exist." assert os.path.exists(data_path), "The given data file does not exist."
if mode == "train": if mode == "train":
train_reader = fluid.io.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(
self.data_reader( self.data_reader(
data_path, self.max_len, shuffle=True), data_path, self.max_len, shuffle=True),
buf_size=batch_size * 100), buf_size=batch_size * 100),
batch_size) batch_size)
# train_reader = fluid.io.batch(self.data_reader(data_path), batch_size)
return train_reader return train_reader
else: else:
test_reader = fluid.io.batch( test_reader = fluid.io.batch(
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- pandas >= 0.20.1 - pandas >= 0.20.1
- PaddlePaddle >= 1.7.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle - PaddlePaddle >= 1.8.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
####   b、下载代码 ####   b、下载代码
...@@ -119,13 +119,10 @@ emb_size: embedding层大小 ...@@ -119,13 +119,10 @@ emb_size: embedding层大小
vocab_size: 词表大小 vocab_size: 词表大小
sample_pro: 采样比率 sample_pro: 采样比率
output_prediction_file: 输出的预测文件 output_prediction_file: 输出的预测文件
init_from_checkpoint: 加载断点模型
init_from_params: 训练好的模型参数文件,一般用于预测 init_from_params: 训练好的模型参数文件,一般用于预测
init_from_pretrain_model: 预训练模型路径,如bert的模型参数 init_from_pretrain_model: 预训练模型路径,如bert的模型参数
inference_model_dir: inference model的保存路径 inference_model_dir: inference model的保存路径
save_model_path: 训练产出模型的输出路径 save_model_path: 训练产出模型的输出路径
save_checkpoint: 调用paddle的io接口save_persistables(把传入的层中所有参数以及优化器进行保存)来保存模型参数
save_param: 调用paddle的io接口save_params(从main_program中取出所有参数然后保存到文件中)来保存模型参数
evaluation_file: 参与评估的inference 文件 evaluation_file: 参与评估的inference 文件
vocab_path: 词表路径 vocab_path: 词表路径
max_seq_len: 输入最大序列长度 max_seq_len: 输入最大序列长度
...@@ -199,7 +196,6 @@ python -u main.py \ ...@@ -199,7 +196,6 @@ python -u main.py \
--loss_type="CLS" \ --loss_type="CLS" \
--max_seq_len=50 \ --max_seq_len=50 \
--save_model_path="data/saved_models/matching_pretrained" \ --save_model_path="data/saved_models/matching_pretrained" \
--save_param="params" \
--training_file="data/input/data/unlabel_data/train.ids" \ --training_file="data/input/data/unlabel_data/train.ids" \
--epoch=20 \ --epoch=20 \
--print_step=1 \ --print_step=1 \
...@@ -217,7 +213,7 @@ python -u main.py \ ...@@ -217,7 +213,7 @@ python -u main.py \
#### windows环境下: #### windows环境下:
训练: 训练:
``` ```
python -u main.py --do_train=true --use_cuda=false --loss_type=CLS --max_seq_len=50 --save_model_path=data\saved_models\matching_pretrained --save_param=params --training_file=data\input\data\unlabel_data\train.ids --epoch=20 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1 python -u main.py --do_train=true --use_cuda=false --loss_type=CLS --max_seq_len=50 --save_model_path=data\saved_models\matching_pretrained --training_file=data\input\data\unlabel_data\train.ids --epoch=20 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
``` ```
#### 2、第二阶段finetuning模型的训练: #### 2、第二阶段finetuning模型的训练:
...@@ -271,9 +267,8 @@ python -u main.py \ ...@@ -271,9 +267,8 @@ python -u main.py \
--use_cuda=${use_cuda} \ --use_cuda=${use_cuda} \
--loss_type="L2" \ --loss_type="L2" \
--max_seq_len=50 \ --max_seq_len=50 \
--init_from_pretrain_model="data/saved_models/trained_models/matching_pretrained/params" \ --init_from_pretrain_model="data/saved_models/trained_models/matching_pretrained/params/params" \
--save_model_path="data/saved_models/human_finetuned" \ --save_model_path="data/saved_models/human_finetuned" \
--save_param="params" \
--training_file="data/input/data/label_data/human/train.ids" \ --training_file="data/input/data/label_data/human/train.ids" \
--epoch=50 \ --epoch=50 \
--print_step=1 \ --print_step=1 \
...@@ -288,7 +283,7 @@ python -u main.py \ ...@@ -288,7 +283,7 @@ python -u main.py \
#### windows环境下: #### windows环境下:
``` ```
python -u main.py --do_train=true --use_cuda=false --loss_type=L2 --max_seq_len=50 --save_model_path=data\saved_models\human_finetuned --save_param=params --training_file=data\input\data\label_data\human\train.ids --epoch=50 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1 python -u main.py --do_train=true --use_cuda=false --loss_type=L2 --max_seq_len=50 --save_model_path=data\saved_models\human_finetuned --training_file=data\input\data\label_data\human\train.ids --epoch=50 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
``` ```
### 模型预测 ### 模型预测
......
...@@ -29,7 +29,7 @@ DATA_MODEL_PATH = { ...@@ -29,7 +29,7 @@ DATA_MODEL_PATH = {
"DATA_PATH": "DATA_PATH":
"https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz", "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz",
"TRAINED_MODEL": "TRAINED_MODEL":
"https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz" "https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.3.0.0.tar.gz"
} }
PATH_MAP = {'DATA_PATH': "./data/input", 'TRAINED_MODEL': './data/saved_models'} PATH_MAP = {'DATA_PATH': "./data/input", 'TRAINED_MODEL': './data/saved_models'}
......
...@@ -34,7 +34,7 @@ def create_net(is_training, ...@@ -34,7 +34,7 @@ def create_net(is_training,
label = model_input.labels label = model_input.labels
#emb #emb
context_emb = fluid.input.embedding( context_emb = fluid.embedding(
input=context_wordseq, input=context_wordseq,
size=[args.vocab_size, args.emb_size], size=[args.vocab_size, args.emb_size],
is_sparse=True, is_sparse=True,
...@@ -42,7 +42,7 @@ def create_net(is_training, ...@@ -42,7 +42,7 @@ def create_net(is_training,
name=word_emb_name, name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1))) initializer=fluid.initializer.Normal(scale=0.1)))
response_emb = fluid.input.embedding( response_emb = fluid.embedding(
input=response_wordseq, input=response_wordseq,
size=[args.vocab_size, args.emb_size], size=[args.vocab_size, args.emb_size],
is_sparse=True, is_sparse=True,
......
...@@ -14,13 +14,10 @@ emb_size: 256 ...@@ -14,13 +14,10 @@ emb_size: 256
vocab_size: 484016 vocab_size: 484016
sample_pro: 1.0 sample_pro: 1.0
output_prediction_file: "" output_prediction_file: ""
init_from_checkpoint: ""
init_from_params: "" init_from_params: ""
init_from_pretrain_model: "" init_from_pretrain_model: ""
inference_model_dir: "" inference_model_dir: ""
save_model_path: "" save_model_path: ""
save_checkpoint: ""
save_param: ""
evaluation_file: "" evaluation_file: ""
vocab_path: "" vocab_path: ""
max_seq_len: 128 max_seq_len: 128
......
...@@ -27,7 +27,6 @@ from ade_net import create_net ...@@ -27,7 +27,6 @@ from ade_net import create_net
from ade.utils.configure import PDConfig from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_save_inference_model(args): def do_save_inference_model(args):
...@@ -55,7 +54,7 @@ def do_save_inference_model(args): ...@@ -55,7 +54,7 @@ def do_save_inference_model(args):
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
data_reader = fluid.io.PyReader( data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False) feed_list=input_inst, capacity=4, iterable=False)
logits = create_net( logits = create_net(
...@@ -72,9 +71,9 @@ def do_save_inference_model(args): ...@@ -72,9 +71,9 @@ def do_save_inference_model(args):
assert (args.init_from_params) or (args.init_from_pretrain_model) assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params: if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog) fluid.load(test_prog, args.init_from_params)
elif args.init_from_pretrain_model: elif args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog) fluid.load(test_prog, args.init_from_pretrain_model)
# saving inference model # saving inference model
fluid.io.save_inference_model( fluid.io.save_inference_model(
......
...@@ -29,7 +29,6 @@ from ade_net import create_net ...@@ -29,7 +29,6 @@ from ade_net import create_net
from ade.utils.configure import PDConfig from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_predict(args): def do_predict(args):
...@@ -59,12 +58,11 @@ def do_predict(args): ...@@ -59,12 +58,11 @@ def do_predict(args):
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
data_reader = fluid.io.PyReader( data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False) feed_list=input_inst, capacity=4, iterable=False)
logits = create_net( logits = create_net(
is_training=False, model_input=input_field, args=args) is_training=False, model_input=input_field, args=args)
logits.persistable = True
fetch_list = [logits.name] fetch_list = [logits.name]
#for_test is True if change the is_test attribute of operators to True #for_test is True if change the is_test attribute of operators to True
...@@ -79,9 +77,9 @@ def do_predict(args): ...@@ -79,9 +77,9 @@ def do_predict(args):
assert (args.init_from_params) or (args.init_from_pretrain_model) assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params: if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog) fluid.load(test_prog, args.init_from_params, executor=exe)
if args.init_from_pretrain_model: if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog) fluid.load(test_prog, args.init_from_pretrain_model, executor=exe)
compiled_test_prog = fluid.CompiledProgram(test_prog) compiled_test_prog = fluid.CompiledProgram(test_prog)
...@@ -94,7 +92,7 @@ def do_predict(args): ...@@ -94,7 +92,7 @@ def do_predict(args):
place=place, phase="test", shuffle=False, sample_pro=1) place=place, phase="test", shuffle=False, sample_pro=1)
num_test_examples = processor.get_num_examples(phase='test') num_test_examples = processor.get_num_examples(phase='test')
data_reader.decorate_batch_generator(batch_generator) data_reader.set_batch_generator(batch_generator, places=place)
data_reader.start() data_reader.start()
scores = [] scores = []
...@@ -110,7 +108,7 @@ def do_predict(args): ...@@ -110,7 +108,7 @@ def do_predict(args):
print("Write the predicted results into the output_prediction_file") print("Write the predicted results into the output_prediction_file")
fw = io.open(args.output_prediction_file, 'w', encoding="utf8") fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
for index, score in enumerate(scores): for index, score in enumerate(scores):
fw.write("%s\t%s\n" % (index, score)) fw.write(u"%s\t%s\n" % (index, score[0]))
print("finish........................................") print("finish........................................")
......
...@@ -67,7 +67,6 @@ function pretrain_train() ...@@ -67,7 +67,6 @@ function pretrain_train()
--loss_type="CLS" \ --loss_type="CLS" \
--max_seq_len=50 \ --max_seq_len=50 \
--save_model_path=${pretrain_model_path} \ --save_model_path=${pretrain_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/unlabel_data/train.ids" \ --training_file="${INPUT_PATH}/unlabel_data/train.ids" \
--epoch=20 \ --epoch=20 \
--print_step=1 \ --print_step=1 \
...@@ -99,9 +98,8 @@ function finetuning_train() ...@@ -99,9 +98,8 @@ function finetuning_train()
--use_cuda=${1} \ --use_cuda=${1} \
--loss_type="L2" \ --loss_type="L2" \
--max_seq_len=50 \ --max_seq_len=50 \
--init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \ --init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/step_final" \
--save_model_path=${save_model_path} \ --save_model_path=${save_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/label_data/${2}/train.ids" \ --training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
--epoch=50 \ --epoch=50 \
--print_step=1 \ --print_step=1 \
...@@ -121,7 +119,7 @@ function pretrain_predict() ...@@ -121,7 +119,7 @@ function pretrain_predict()
--do_predict=true \ --do_predict=true \
--use_cuda=${1} \ --use_cuda=${1} \
--predict_file="${INPUT_PATH}/unlabel_data/test.ids" \ --predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
--init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \ --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params/params" \
--loss_type="CLS" \ --loss_type="CLS" \
--output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \ --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
--max_seq_len=50 \ --max_seq_len=50 \
...@@ -137,7 +135,7 @@ function finetuning_predict() ...@@ -137,7 +135,7 @@ function finetuning_predict()
--do_predict=true \ --do_predict=true \
--use_cuda=${1} \ --use_cuda=${1} \
--predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \ --predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
--init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \ --init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params/params" \
--loss_type="L2" \ --loss_type="L2" \
--output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \ --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
--max_seq_len=50 \ --max_seq_len=50 \
......
...@@ -29,7 +29,6 @@ from ade_net import create_net, set_word_embedding ...@@ -29,7 +29,6 @@ from ade_net import create_net, set_word_embedding
from ade.utils.configure import PDConfig from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
try: try:
import cPickle as pickle #python 2 import cPickle as pickle #python 2
...@@ -62,24 +61,27 @@ def do_train(args): ...@@ -62,24 +61,27 @@ def do_train(args):
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
data_reader = fluid.io.PyReader( data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False) feed_list=input_inst, capacity=4, iterable=False)
loss = create_net( loss = create_net(
is_training=True, model_input=input_field, args=args) is_training=True, model_input=input_field, args=args)
loss.persistable = True
# gradient clipping # gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue( optimizer = fluid.optimizer.AdamOptimizer(
max=1.0, min=-1.0)) learning_rate=args.learning_rate,
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) grad_clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer.minimize(loss) optimizer.minimize(loss)
if args.use_cuda: if args.use_cuda:
dev_count = fluid.core.get_cuda_device_count() places = fluid.cuda_places()
dev_count = len(places)
place = fluid.CUDAPlace( place = fluid.CUDAPlace(
int(os.getenv('FLAGS_selected_gpus', '0'))) int(os.getenv('FLAGS_selected_gpus', '0')))
else: else:
dev_count = int(os.environ.get('CPU_NUM', 1)) places = fluid.cpu_places()
dev_count = len(places)
place = fluid.CPUPlace() place = fluid.CPUPlace()
processor = reader.DataProcessor( processor = reader.DataProcessor(
...@@ -99,20 +101,20 @@ def do_train(args): ...@@ -99,20 +101,20 @@ def do_train(args):
print("Num train examples: %d" % num_train_examples) print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps) print("Max train steps: %d" % max_train_steps)
data_reader.decorate_batch_generator(batch_generator) data_reader.set_batch_generator(batch_generator, places=place)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog) exe.run(startup_prog)
assert (args.init_from_checkpoint == "") or ( assert (args.init_from_params == "") or (
args.init_from_pretrain_model == "") args.init_from_pretrain_model == "")
#init from some checkpoint, to resume the previous training #init from some checkpoint, to resume the previous training
if args.init_from_checkpoint: if args.init_from_params:
save_load_io.init_from_checkpoint(args, exe, train_prog) fluid.load(train_prog, args.init_from_params, exe)
#init from some pretrain models, to better solve the current task #init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model: if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, train_prog) fluid.load(train_prog, args.init_from_pretrain_model, exe)
if args.word_emb_init: if args.word_emb_init:
print("start loading word embedding init ...") print("start loading word embedding init ...")
...@@ -163,21 +165,17 @@ def do_train(args): ...@@ -163,21 +165,17 @@ def do_train(args):
time_begin = time.time() time_begin = time.time()
if steps % args.save_steps == 0: if steps % args.save_steps == 0:
if args.save_checkpoint: model_path = os.path.join(args.save_model_path,
save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps))
"step_" + str(steps)) fluid.save(train_prog, model_path)
if args.save_param:
save_load_io.save_param(args, exe, train_prog,
"step_" + str(steps))
steps += 1 steps += 1
except fluid.core.EOFException: except fluid.core.EOFException:
data_reader.reset() data_reader.reset()
break break
if args.save_checkpoint: model_path = os.path.join(args.save_model_path, "step_final")
save_load_io.save_checkpoint(args, exe, train_prog, "step_final") fluid.save(train_prog, model_path)
if args.save_param:
save_load_io.save_param(args, exe, train_prog, "step_final")
def get_cards(): def get_cards():
num = 0 num = 0
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.7.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。 - PaddlePaddle >= 1.8.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
####   b、下载代码 ####   b、下载代码
...@@ -123,13 +123,10 @@ format:conversation_content \t question \1 answer \t state1 state2 state3..... ...@@ -123,13 +123,10 @@ format:conversation_content \t question \1 answer \t state1 state2 state3.....
task_name: 任务名称,可选udc、swda、mrda、atis_intent、atis_slot、dstc2 task_name: 任务名称,可选udc、swda、mrda、atis_intent、atis_slot、dstc2
data_dir: 数据路径,如./data/input/data/udc data_dir: 数据路径,如./data/input/data/udc
bert_config_path: 预训练模型bert的网络配置./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json bert_config_path: 预训练模型bert的网络配置./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json
init_from_checkpoint: 加载断点模型
init_from_params: 训练好的模型参数文件,一般用于预测 init_from_params: 训练好的模型参数文件,一般用于预测
init_from_pretrain_model: 预训练模型路径,如bert的模型参数 init_from_pretrain_model: 预训练模型路径,如bert的模型参数
inference_model_dir: inference model的保存路径 inference_model_dir: inference model的保存路径
save_model_path: 训练产出模型的输出路径 save_model_path: 训练产出模型的输出路径
save_checkpoint: 调用paddle的io接口save_persistables(把传入的层中所有参数以及优化器进行保存)来保存模型参数
save_param: 调用paddle的io接口save_params(从main_program中取出所有参数然后保存到文件中)来保存模型参数
lr_scheduler: learning rate scheduler lr_scheduler: learning rate scheduler
weight_decay: learning rate 权重衰减因子 weight_decay: learning rate 权重衰减因子
warmup_proportion: warmup比率 warmup_proportion: warmup比率
...@@ -221,7 +218,6 @@ python -u main.py \ ...@@ -221,7 +218,6 @@ python -u main.py \
--vocab_path="${BERT_BASE_PATH}/vocab.txt" \ --vocab_path="${BERT_BASE_PATH}/vocab.txt" \
--init_from_pretrain_model="${BERT_BASE_PATH}/params" \ --init_from_pretrain_model="${BERT_BASE_PATH}/params" \
--save_model_path="./data/saved_models/${TASK_NAME}" \ --save_model_path="./data/saved_models/${TASK_NAME}" \
--save_param="params" \
--save_steps=100 \ --save_steps=100 \
--learning_rate=2e-5 \ --learning_rate=2e-5 \
--weight_decay=0.01 \ --weight_decay=0.01 \
...@@ -235,7 +231,7 @@ python -u main.py \ ...@@ -235,7 +231,7 @@ python -u main.py \
#### windows环境下 #### windows环境下
``` ```
python -u main.py --task_name=atis_intent --use_cuda=false --do_train=true --epoch=20 --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --init_from_pretrain_model=data\pretrain_model\uncased_L-12_H-768_A-12\params --save_model_path=data\saved_models\atis_intent --save_param=params --save_steps=100 --learning_rate=2e-5 --weight_decay=0.01 --max_seq_len=128 --print_steps=10 python -u main.py --task_name=atis_intent --use_cuda=false --do_train=true --epoch=20 --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --init_from_pretrain_model=data\pretrain_model\uncased_L-12_H-768_A-12\params --save_model_path=data\saved_models\atis_intent --save_steps=100 --learning_rate=2e-5 --weight_decay=0.01 --max_seq_len=128 --print_steps=10
``` ```
### 模型预测 ### 模型预测
...@@ -294,7 +290,7 @@ python -u main.py \ ...@@ -294,7 +290,7 @@ python -u main.py \
--batch_size=32 \ --batch_size=32 \
--do_lower_case=true \ --do_lower_case=true \
--data_dir="./data/input/data/atis/${TASK_NAME}" \ --data_dir="./data/input/data/atis/${TASK_NAME}" \
--init_from_params="./data/saved_models/trained_models/${TASK_NAME}/params" \ --init_from_params="./data/saved_models/trained_models/${TASK_NAME}/params/params" \
--bert_config_path="${BERT_BASE_PATH}/bert_config.json" \ --bert_config_path="${BERT_BASE_PATH}/bert_config.json" \
--vocab_path="${BERT_BASE_PATH}/vocab.txt" \ --vocab_path="${BERT_BASE_PATH}/vocab.txt" \
--output_prediction_file="./data/output/pred_${TASK_NAME}" \ --output_prediction_file="./data/output/pred_${TASK_NAME}" \
...@@ -305,7 +301,7 @@ python -u main.py \ ...@@ -305,7 +301,7 @@ python -u main.py \
#### windows环境下 #### windows环境下
``` ```
python -u main.py --task_name=atis_intent --use_cuda=false --do_predict=true --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --init_from_params=data\saved_models\trained_models\atis_intent\params --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --output_prediction_file=data\output\pred_atis_intent --max_seq_len=128 python -u main.py --task_name=atis_intent --use_cuda=false --do_predict=true --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --init_from_params=data\saved_models\trained_models\atis_intent\params\params --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --output_prediction_file=data\output\pred_atis_intent --max_seq_len=128
``` ```
### 模型评估 ### 模型评估
......
task_name: "" task_name: ""
data_dir: "" data_dir: ""
bert_config_path: "" bert_config_path: ""
init_from_checkpoint: ""
init_from_params: "" init_from_params: ""
init_from_pretrain_model: "" init_from_pretrain_model: ""
inference_model_dir: "" inference_model_dir: ""
save_model_path: "" save_model_path: ""
save_checkpoint: ""
save_param: ""
lr_scheduler: "linear_warmup_decay" lr_scheduler: "linear_warmup_decay"
weight_decay: 0.01 weight_decay: 0.01
warmup_proportion: 0.1 warmup_proportion: 0.1
......
...@@ -87,21 +87,21 @@ class BertModel(object): ...@@ -87,21 +87,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.input.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer), name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False) is_sparse=False)
position_emb_out = fluid.input.embedding( position_emb_out = fluid.embedding(
input=position_ids, input=position_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer)) name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.input.embedding( sent_emb_out = fluid.embedding(
sentence_ids, sentence_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
......
...@@ -48,8 +48,8 @@ class Paradigm(object): ...@@ -48,8 +48,8 @@ class Paradigm(object):
initializer=fluid.initializer.TruncatedNormal(scale=0.02)), initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.))) name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
if not params['is_training']: if not params['is_training']:
probs = fluid.layers.softmax(logits) probs = fluid.layers.softmax(logits)
results = {"probs": probs} results = {"probs": probs}
return results return results
......
...@@ -17,7 +17,6 @@ import re ...@@ -17,7 +17,6 @@ import re
import sys import sys
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -59,7 +59,13 @@ def optimization(loss, ...@@ -59,7 +59,13 @@ def optimization(loss,
weight_decay, weight_decay,
scheduler='linear_warmup_decay', scheduler='linear_warmup_decay',
use_fp16=False, use_fp16=False,
loss_scaling=1.0): loss_scaling=1.0,
clip_norm_thres=1.0):
# When using mixed precision training, scale the gradient clip threshold
# by loss_scaling
if use_fp16 and loss_scaling > 1.0:
clip_norm_thres *= loss_scaling
if warmup_steps > 0: if warmup_steps > 0:
if scheduler == 'noam_decay': if scheduler == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\ scheduled_lr = fluid.layers.learning_rate_scheduler\
...@@ -71,19 +77,17 @@ def optimization(loss, ...@@ -71,19 +77,17 @@ def optimization(loss,
else: else:
raise ValueError("Unkown learning rate scheduler, should be " raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'") "'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr) optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=scheduled_lr,
grad_clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=clip_norm_thres))
else: else:
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=learning_rate,
grad_clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=clip_norm_thres))
scheduled_lr = learning_rate scheduled_lr = learning_rate
clip_norm_thres = 1.0
# When using mixed precision training, scale the gradient clip threshold
# by loss_scaling
if use_fp16 and loss_scaling > 1.0:
clip_norm_thres *= loss_scaling
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name): def exclude_from_weight_decay(name):
if name.find("layer_norm") > -1: if name.find("layer_norm") > -1:
return True return True
......
...@@ -29,7 +29,7 @@ DATA_MODEL_PATH = { ...@@ -29,7 +29,7 @@ DATA_MODEL_PATH = {
"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz", "DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz",
"PRETRAIN_MODEL": "PRETRAIN_MODEL":
"https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz", "https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz",
"TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz" "TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_3.0.0.tar.gz"
} }
PATH_MAP = { PATH_MAP = {
......
...@@ -25,7 +25,6 @@ import paddle.fluid as fluid ...@@ -25,7 +25,6 @@ import paddle.fluid as fluid
from dgu.utils.configure import PDConfig from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
import dgu.reader as reader import dgu.reader as reader
from dgu_net import create_net from dgu_net import create_net
...@@ -97,12 +96,10 @@ def do_save_inference_model(args): ...@@ -97,12 +96,10 @@ def do_save_inference_model(args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog) exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model) assert (args.init_from_params)
if args.init_from_params: if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog) fluid.load(test_prog, args.init_from_params)
elif args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
# saving inference model # saving inference model
fluid.io.save_inference_model( fluid.io.save_inference_model(
......
...@@ -16,7 +16,6 @@ import os ...@@ -16,7 +16,6 @@ import os
import sys import sys
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from eval import do_eval from eval import do_eval
......
...@@ -19,7 +19,6 @@ import sys ...@@ -19,7 +19,6 @@ import sys
import numpy as np import numpy as np
import argparse import argparse
import collections import collections
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import dgu.reader as reader import dgu.reader as reader
...@@ -30,7 +29,6 @@ import dgu.define_predict_pack as define_predict_pack ...@@ -30,7 +29,6 @@ import dgu.define_predict_pack as define_predict_pack
from dgu.utils.configure import PDConfig from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
from dgu.utils.py23 import tab_tok, rt_tok from dgu.utils.py23 import tab_tok, rt_tok
...@@ -84,7 +82,7 @@ def do_predict(args): ...@@ -84,7 +82,7 @@ def do_predict(args):
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
data_reader = fluid.io.PyReader( data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False) feed_list=input_inst, capacity=4, iterable=False)
results = create_net( results = create_net(
...@@ -95,9 +93,6 @@ def do_predict(args): ...@@ -95,9 +93,6 @@ def do_predict(args):
args=args) args=args)
probs = results.get("probs", None) probs = results.get("probs", None)
probs.persistable = True
fetch_list = [probs.name] fetch_list = [probs.name]
#for_test is True if change the is_test attribute of operators to True #for_test is True if change the is_test attribute of operators to True
...@@ -111,12 +106,10 @@ def do_predict(args): ...@@ -111,12 +106,10 @@ def do_predict(args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog) exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model) assert (args.init_from_params)
if args.init_from_params: if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog) fluid.load(test_prog, args.init_from_params)
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
compiled_test_prog = fluid.CompiledProgram(test_prog) compiled_test_prog = fluid.CompiledProgram(test_prog)
...@@ -130,7 +123,7 @@ def do_predict(args): ...@@ -130,7 +123,7 @@ def do_predict(args):
batch_generator = processor.data_generator( batch_generator = processor.data_generator(
batch_size=args.batch_size, phase='test', shuffle=False) batch_size=args.batch_size, phase='test', shuffle=False)
data_reader.decorate_batch_generator(batch_generator) data_reader.set_batch_generator(batch_generator, places=place)
data_reader.start() data_reader.start()
all_results = [] all_results = []
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
export FLAGS_sync_nccl_allreduce=0 export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1 export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=1 export CUDA_VISIBLE_DEVICES=
if [ ! "$CUDA_VISIBLE_DEVICES" ] if [ ! "$CUDA_VISIBLE_DEVICES" ]
then then
export CPU_NUM=1 export CPU_NUM=1
...@@ -21,7 +21,7 @@ SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}" ...@@ -21,7 +21,7 @@ SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}"
TRAIN_MODEL_PATH="./data/saved_models/trained_models" TRAIN_MODEL_PATH="./data/saved_models/trained_models"
OUTPUT_PATH="./data/output" OUTPUT_PATH="./data/output"
INFERENCE_MODEL="data/inference_models" INFERENCE_MODEL="data/inference_models"
PYTHON_PATH="python3" PYTHON_PATH="python"
if [ -f ${SAVE_MODEL_PATH} ]; then if [ -f ${SAVE_MODEL_PATH} ]; then
rm ${SAVE_MODEL_PATH} rm ${SAVE_MODEL_PATH}
...@@ -94,7 +94,6 @@ else ...@@ -94,7 +94,6 @@ else
exit 255 exit 255
fi fi
#training #training
function train() function train()
{ {
...@@ -110,7 +109,6 @@ function train() ...@@ -110,7 +109,6 @@ function train()
--vocab_path=${BERT_BASE_PATH}/vocab.txt \ --vocab_path=${BERT_BASE_PATH}/vocab.txt \
--init_from_pretrain_model=${BERT_BASE_PATH}/params \ --init_from_pretrain_model=${BERT_BASE_PATH}/params \
--save_model_path=${SAVE_MODEL_PATH} \ --save_model_path=${SAVE_MODEL_PATH} \
--save_param="params" \
--save_steps=${save_steps} \ --save_steps=${save_steps} \
--learning_rate=${learning_rate} \ --learning_rate=${learning_rate} \
--weight_decay=0.01 \ --weight_decay=0.01 \
...@@ -128,7 +126,7 @@ function predict() ...@@ -128,7 +126,7 @@ function predict()
--batch_size=${batch_size} \ --batch_size=${batch_size} \
--data_dir=${INPUT_PATH} \ --data_dir=${INPUT_PATH} \
--do_lower_case=true \ --do_lower_case=true \
--init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \ --init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params/params \
--bert_config_path=${BERT_BASE_PATH}/bert_config.json \ --bert_config_path=${BERT_BASE_PATH}/bert_config.json \
--vocab_path=${BERT_BASE_PATH}/vocab.txt \ --vocab_path=${BERT_BASE_PATH}/vocab.txt \
--output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \ --output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \
......
...@@ -22,7 +22,6 @@ import sys ...@@ -22,7 +22,6 @@ import sys
import time import time
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from dgu_net import create_net from dgu_net import create_net
...@@ -32,7 +31,6 @@ import dgu.define_paradigm as define_paradigm ...@@ -32,7 +31,6 @@ import dgu.define_paradigm as define_paradigm
from dgu.utils.configure import PDConfig from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
def do_train(args): def do_train(args):
...@@ -80,8 +78,9 @@ def do_train(args): ...@@ -80,8 +78,9 @@ def do_train(args):
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
data_reader = fluid.io.DataLoader.from_generator(feed_list=input_inst, capacity=4, iterable=False) data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
processor = processors[task_name](data_dir=args.data_dir, processor = processors[task_name](data_dir=args.data_dir,
vocab_path=args.vocab_path, vocab_path=args.vocab_path,
...@@ -103,13 +102,8 @@ def do_train(args): ...@@ -103,13 +102,8 @@ def do_train(args):
accuracy = results.get("accuracy", None) accuracy = results.get("accuracy", None)
num_seqs = results.get("num_seqs", None) num_seqs = results.get("num_seqs", None)
loss.persistable = True places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places(
probs.persistable = True )
if accuracy:
accuracy.persistable = True
num_seqs.persistable = True
places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places()
dev_count = len(places) dev_count = len(places)
batch_generator = processor.data_generator( batch_generator = processor.data_generator(
...@@ -149,16 +143,13 @@ def do_train(args): ...@@ -149,16 +143,13 @@ def do_train(args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup_prog) exe.run(startup_prog)
assert (args.init_from_checkpoint == "") or ( assert args.init_from_params or args.init_from_pretrain_model
args.init_from_pretrain_model == "")
# init from some checkpoint, to resume the previous training # init from some checkpoint, to resume the previous training
if args.init_from_checkpoint: if args.init_from_params:
save_load_io.init_from_checkpoint(args, exe, train_prog) fluid.load(train_prog, args.init_from_params, exe)
# init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model: if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, train_prog) fluid.load(train_prog, args.init_from_pretrain_model, exe)
build_strategy = fluid.compiler.BuildStrategy() build_strategy = fluid.compiler.BuildStrategy()
build_strategy.enable_inplace = True build_strategy.enable_inplace = True
...@@ -234,21 +225,16 @@ def do_train(args): ...@@ -234,21 +225,16 @@ def do_train(args):
time_begin = time.time() time_begin = time.time()
if steps % args.save_steps == 0: if steps % args.save_steps == 0:
save_path = "step_" + str(steps) model_path = os.path.join(args.save_model_path,
if args.save_checkpoint: "step_" + str(steps))
save_load_io.save_checkpoint(args, exe, train_prog, fluid.save(train_prog, model_path)
save_path)
if args.save_param:
save_load_io.save_param(args, exe, train_prog,
save_path)
except fluid.core.EOFException: except fluid.core.EOFException:
data_reader.reset() data_reader.reset()
break break
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog, "step_final") model_path = os.path.join(args.save_model_path, "step_final")
if args.save_param: fluid.save(train_prog, model_path)
save_load_io.save_param(args, exe, train_prog, "step_final")
def get_cards(): def get_cards():
num = 0 num = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册