未验证 提交 5a41865d 编写于 作者: 0 0YuanZhang0 提交者: GitHub

Upgrade dialogue models to paddle 1.8 (#4594)

* upgrade_dialogue_models

* fix_upgrade_models_comments
上级 a5c2db94
......@@ -10,11 +10,11 @@
## 快速开始
**目前模型要求使用PaddlePaddle 1.6及以上版本或适当的develop版本运行。**
**目前模型要求使用PaddlePaddle 1.8及以上版本或适当的develop版本运行。**
### 1. Paddle版本安装
本项目训练模块兼容Python2.7.x以及Python3.7.x, 依赖PaddlePaddle 1.6版本以及CentOS系统环境, 安装请参考官网 [快速安装](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/install/index_cn.html)
本项目训练模块兼容Python2.7.x以及Python3.7.x, 依赖PaddlePaddle 1.8版本以及CentOS系统环境, 安装请参考官网 [快速安装](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/install/index_cn.html)
注意:该模型同时支持cpu和gpu训练和预测,用户可以根据自身需求,选择安装对应的paddlepaddle-gpu或paddlepaddle版本。
......
文件模式从 100755 更改为 100644
......@@ -452,7 +452,7 @@ def main(args):
if args.use_cuda:
test_place = fluid.cuda_places(0)
place = fluid.cuda_places()
DEV_COUNT = fluid.core.get_cuda_device_count()
DEV_COUNT = len(place)
else:
test_place = fluid.cpu_places(1)
os.environ['CPU_NUM'] = str(args.cpu_num)
......
......@@ -130,12 +130,11 @@ class DataReader(object):
assert os.path.exists(data_path), "The given data file does not exist."
if mode == "train":
train_reader = fluid.io.batch(
paddle.reader.shuffle(
fluid.io.shuffle(
self.data_reader(
data_path, self.max_len, shuffle=True),
buf_size=batch_size * 100),
batch_size)
# train_reader = fluid.io.batch(self.data_reader(data_path), batch_size)
return train_reader
else:
test_reader = fluid.io.batch(
......
......@@ -30,7 +30,7 @@
- cuda >= 9.0
- cudnn >= 7.0
- pandas >= 0.20.1
- PaddlePaddle >= 1.7.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
- PaddlePaddle >= 1.8.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
####   b、下载代码
......@@ -119,13 +119,10 @@ emb_size: embedding层大小
vocab_size: 词表大小
sample_pro: 采样比率
output_prediction_file: 输出的预测文件
init_from_checkpoint: 加载断点模型
init_from_params: 训练好的模型参数文件,一般用于预测
init_from_pretrain_model: 预训练模型路径,如bert的模型参数
inference_model_dir: inference model的保存路径
save_model_path: 训练产出模型的输出路径
save_checkpoint: 调用paddle的io接口save_persistables(把传入的层中所有参数以及优化器进行保存)来保存模型参数
save_param: 调用paddle的io接口save_params(从main_program中取出所有参数然后保存到文件中)来保存模型参数
evaluation_file: 参与评估的inference 文件
vocab_path: 词表路径
max_seq_len: 输入最大序列长度
......@@ -199,7 +196,6 @@ python -u main.py \
--loss_type="CLS" \
--max_seq_len=50 \
--save_model_path="data/saved_models/matching_pretrained" \
--save_param="params" \
--training_file="data/input/data/unlabel_data/train.ids" \
--epoch=20 \
--print_step=1 \
......@@ -217,7 +213,7 @@ python -u main.py \
#### windows环境下:
训练:
```
python -u main.py --do_train=true --use_cuda=false --loss_type=CLS --max_seq_len=50 --save_model_path=data\saved_models\matching_pretrained --save_param=params --training_file=data\input\data\unlabel_data\train.ids --epoch=20 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
python -u main.py --do_train=true --use_cuda=false --loss_type=CLS --max_seq_len=50 --save_model_path=data\saved_models\matching_pretrained --training_file=data\input\data\unlabel_data\train.ids --epoch=20 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
```
#### 2、第二阶段finetuning模型的训练:
......@@ -271,9 +267,8 @@ python -u main.py \
--use_cuda=${use_cuda} \
--loss_type="L2" \
--max_seq_len=50 \
--init_from_pretrain_model="data/saved_models/trained_models/matching_pretrained/params" \
--init_from_pretrain_model="data/saved_models/trained_models/matching_pretrained/params/params" \
--save_model_path="data/saved_models/human_finetuned" \
--save_param="params" \
--training_file="data/input/data/label_data/human/train.ids" \
--epoch=50 \
--print_step=1 \
......@@ -288,7 +283,7 @@ python -u main.py \
#### windows环境下:
```
python -u main.py --do_train=true --use_cuda=false --loss_type=L2 --max_seq_len=50 --save_model_path=data\saved_models\human_finetuned --save_param=params --training_file=data\input\data\label_data\human\train.ids --epoch=50 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
python -u main.py --do_train=true --use_cuda=false --loss_type=L2 --max_seq_len=50 --save_model_path=data\saved_models\human_finetuned --training_file=data\input\data\label_data\human\train.ids --epoch=50 --print_step=1 --save_step=400 --batch_size=256 --hidden_size=256 --emb_size=256 --vocab_size=484016 --learning_rate=0.001 --sample_pro=0.1
```
### 模型预测
......
......@@ -29,7 +29,7 @@ DATA_MODEL_PATH = {
"DATA_PATH":
"https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz",
"TRAINED_MODEL":
"https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz"
"https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.3.0.0.tar.gz"
}
PATH_MAP = {'DATA_PATH': "./data/input", 'TRAINED_MODEL': './data/saved_models'}
......
......@@ -34,7 +34,7 @@ def create_net(is_training,
label = model_input.labels
#emb
context_emb = fluid.input.embedding(
context_emb = fluid.embedding(
input=context_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
......@@ -42,7 +42,7 @@ def create_net(is_training,
name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1)))
response_emb = fluid.input.embedding(
response_emb = fluid.embedding(
input=response_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
......
......@@ -14,13 +14,10 @@ emb_size: 256
vocab_size: 484016
sample_pro: 1.0
output_prediction_file: ""
init_from_checkpoint: ""
init_from_params: ""
init_from_pretrain_model: ""
inference_model_dir: ""
save_model_path: ""
save_checkpoint: ""
save_param: ""
evaluation_file: ""
vocab_path: ""
max_seq_len: 128
......
......@@ -27,7 +27,6 @@ from ade_net import create_net
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_save_inference_model(args):
......@@ -55,7 +54,7 @@ def do_save_inference_model(args):
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(
data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
logits = create_net(
......@@ -72,9 +71,9 @@ def do_save_inference_model(args):
assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
fluid.load(test_prog, args.init_from_params)
elif args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
fluid.load(test_prog, args.init_from_pretrain_model)
# saving inference model
fluid.io.save_inference_model(
......
......@@ -29,7 +29,6 @@ from ade_net import create_net
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_predict(args):
......@@ -59,12 +58,11 @@ def do_predict(args):
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(
data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
logits = create_net(
is_training=False, model_input=input_field, args=args)
logits.persistable = True
fetch_list = [logits.name]
#for_test is True if change the is_test attribute of operators to True
......@@ -79,9 +77,9 @@ def do_predict(args):
assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
fluid.load(test_prog, args.init_from_params, executor=exe)
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
fluid.load(test_prog, args.init_from_pretrain_model, executor=exe)
compiled_test_prog = fluid.CompiledProgram(test_prog)
......@@ -94,7 +92,7 @@ def do_predict(args):
place=place, phase="test", shuffle=False, sample_pro=1)
num_test_examples = processor.get_num_examples(phase='test')
data_reader.decorate_batch_generator(batch_generator)
data_reader.set_batch_generator(batch_generator, places=place)
data_reader.start()
scores = []
......@@ -110,7 +108,7 @@ def do_predict(args):
print("Write the predicted results into the output_prediction_file")
fw = io.open(args.output_prediction_file, 'w', encoding="utf8")
for index, score in enumerate(scores):
fw.write("%s\t%s\n" % (index, score))
fw.write(u"%s\t%s\n" % (index, score[0]))
print("finish........................................")
......
......@@ -67,7 +67,6 @@ function pretrain_train()
--loss_type="CLS" \
--max_seq_len=50 \
--save_model_path=${pretrain_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/unlabel_data/train.ids" \
--epoch=20 \
--print_step=1 \
......@@ -99,9 +98,8 @@ function finetuning_train()
--use_cuda=${1} \
--loss_type="L2" \
--max_seq_len=50 \
--init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \
--init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/step_final" \
--save_model_path=${save_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
--epoch=50 \
--print_step=1 \
......@@ -121,7 +119,7 @@ function pretrain_predict()
--do_predict=true \
--use_cuda=${1} \
--predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
--init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
--init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params/params" \
--loss_type="CLS" \
--output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
--max_seq_len=50 \
......@@ -137,7 +135,7 @@ function finetuning_predict()
--do_predict=true \
--use_cuda=${1} \
--predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
--init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \
--init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params/params" \
--loss_type="L2" \
--output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
--max_seq_len=50 \
......
......@@ -29,7 +29,6 @@ from ade_net import create_net, set_word_embedding
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
try:
import cPickle as pickle #python 2
......@@ -62,24 +61,27 @@ def do_train(args):
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(
data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
loss = create_net(
is_training=True, model_input=input_field, args=args)
loss.persistable = True
# gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=args.learning_rate,
grad_clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer.minimize(loss)
if args.use_cuda:
dev_count = fluid.core.get_cuda_device_count()
places = fluid.cuda_places()
dev_count = len(places)
place = fluid.CUDAPlace(
int(os.getenv('FLAGS_selected_gpus', '0')))
else:
dev_count = int(os.environ.get('CPU_NUM', 1))
places = fluid.cpu_places()
dev_count = len(places)
place = fluid.CPUPlace()
processor = reader.DataProcessor(
......@@ -99,20 +101,20 @@ def do_train(args):
print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps)
data_reader.decorate_batch_generator(batch_generator)
data_reader.set_batch_generator(batch_generator, places=place)
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_checkpoint == "") or (
assert (args.init_from_params == "") or (
args.init_from_pretrain_model == "")
#init from some checkpoint, to resume the previous training
if args.init_from_checkpoint:
save_load_io.init_from_checkpoint(args, exe, train_prog)
if args.init_from_params:
fluid.load(train_prog, args.init_from_params, exe)
#init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, train_prog)
fluid.load(train_prog, args.init_from_pretrain_model, exe)
if args.word_emb_init:
print("start loading word embedding init ...")
......@@ -163,21 +165,17 @@ def do_train(args):
time_begin = time.time()
if steps % args.save_steps == 0:
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog,
"step_" + str(steps))
if args.save_param:
save_load_io.save_param(args, exe, train_prog,
"step_" + str(steps))
model_path = os.path.join(args.save_model_path,
"step_" + str(steps))
fluid.save(train_prog, model_path)
steps += 1
except fluid.core.EOFException:
data_reader.reset()
break
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
if args.save_param:
save_load_io.save_param(args, exe, train_prog, "step_final")
model_path = os.path.join(args.save_model_path, "step_final")
fluid.save(train_prog, model_path)
def get_cards():
num = 0
......
......@@ -23,7 +23,7 @@
- Python >= 2.7
- cuda >= 9.0
- cudnn >= 7.0
- PaddlePaddle >= 1.7.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
- PaddlePaddle >= 1.8.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
####   b、下载代码
......@@ -123,13 +123,10 @@ format:conversation_content \t question \1 answer \t state1 state2 state3.....
task_name: 任务名称,可选udc、swda、mrda、atis_intent、atis_slot、dstc2
data_dir: 数据路径,如./data/input/data/udc
bert_config_path: 预训练模型bert的网络配置./data/pretrain_model/uncased_L-12_H-768_A-12/bert_config.json
init_from_checkpoint: 加载断点模型
init_from_params: 训练好的模型参数文件,一般用于预测
init_from_pretrain_model: 预训练模型路径,如bert的模型参数
inference_model_dir: inference model的保存路径
save_model_path: 训练产出模型的输出路径
save_checkpoint: 调用paddle的io接口save_persistables(把传入的层中所有参数以及优化器进行保存)来保存模型参数
save_param: 调用paddle的io接口save_params(从main_program中取出所有参数然后保存到文件中)来保存模型参数
lr_scheduler: learning rate scheduler
weight_decay: learning rate 权重衰减因子
warmup_proportion: warmup比率
......@@ -221,7 +218,6 @@ python -u main.py \
--vocab_path="${BERT_BASE_PATH}/vocab.txt" \
--init_from_pretrain_model="${BERT_BASE_PATH}/params" \
--save_model_path="./data/saved_models/${TASK_NAME}" \
--save_param="params" \
--save_steps=100 \
--learning_rate=2e-5 \
--weight_decay=0.01 \
......@@ -235,7 +231,7 @@ python -u main.py \
#### windows环境下
```
python -u main.py --task_name=atis_intent --use_cuda=false --do_train=true --epoch=20 --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --init_from_pretrain_model=data\pretrain_model\uncased_L-12_H-768_A-12\params --save_model_path=data\saved_models\atis_intent --save_param=params --save_steps=100 --learning_rate=2e-5 --weight_decay=0.01 --max_seq_len=128 --print_steps=10
python -u main.py --task_name=atis_intent --use_cuda=false --do_train=true --epoch=20 --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --init_from_pretrain_model=data\pretrain_model\uncased_L-12_H-768_A-12\params --save_model_path=data\saved_models\atis_intent --save_steps=100 --learning_rate=2e-5 --weight_decay=0.01 --max_seq_len=128 --print_steps=10
```
### 模型预测
......@@ -294,7 +290,7 @@ python -u main.py \
--batch_size=32 \
--do_lower_case=true \
--data_dir="./data/input/data/atis/${TASK_NAME}" \
--init_from_params="./data/saved_models/trained_models/${TASK_NAME}/params" \
--init_from_params="./data/saved_models/trained_models/${TASK_NAME}/params/params" \
--bert_config_path="${BERT_BASE_PATH}/bert_config.json" \
--vocab_path="${BERT_BASE_PATH}/vocab.txt" \
--output_prediction_file="./data/output/pred_${TASK_NAME}" \
......@@ -305,7 +301,7 @@ python -u main.py \
#### windows环境下
```
python -u main.py --task_name=atis_intent --use_cuda=false --do_predict=true --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --init_from_params=data\saved_models\trained_models\atis_intent\params --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --output_prediction_file=data\output\pred_atis_intent --max_seq_len=128
python -u main.py --task_name=atis_intent --use_cuda=false --do_predict=true --batch_size=32 --do_lower_case=true --data_dir=data\input\data\atis\atis_intent --init_from_params=data\saved_models\trained_models\atis_intent\params\params --bert_config_path=data\pretrain_model\uncased_L-12_H-768_A-12\bert_config.json --vocab_path=data\pretrain_model\uncased_L-12_H-768_A-12\vocab.txt --output_prediction_file=data\output\pred_atis_intent --max_seq_len=128
```
### 模型评估
......
task_name: ""
data_dir: ""
bert_config_path: ""
init_from_checkpoint: ""
init_from_params: ""
init_from_pretrain_model: ""
inference_model_dir: ""
save_model_path: ""
save_checkpoint: ""
save_param: ""
lr_scheduler: "linear_warmup_decay"
weight_decay: 0.01
warmup_proportion: 0.1
......
......@@ -87,21 +87,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0
emb_out = fluid.input.embedding(
emb_out = fluid.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
position_emb_out = fluid.input.embedding(
position_emb_out = fluid.embedding(
input=position_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.input.embedding(
sent_emb_out = fluid.embedding(
sentence_ids,
size=[self._sent_types, self._emb_size],
dtype=self._dtype,
......
......@@ -48,8 +48,8 @@ class Paradigm(object):
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
if not params['is_training']:
if not params['is_training']:
probs = fluid.layers.softmax(logits)
results = {"probs": probs}
return results
......
......@@ -17,7 +17,6 @@ import re
import sys
import numpy as np
import paddle
import paddle.fluid as fluid
......
......@@ -59,7 +59,13 @@ def optimization(loss,
weight_decay,
scheduler='linear_warmup_decay',
use_fp16=False,
loss_scaling=1.0):
loss_scaling=1.0,
clip_norm_thres=1.0):
# When using mixed precision training, scale the gradient clip threshold
# by loss_scaling
if use_fp16 and loss_scaling > 1.0:
clip_norm_thres *= loss_scaling
if warmup_steps > 0:
if scheduler == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\
......@@ -71,19 +77,17 @@ def optimization(loss,
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=scheduled_lr,
grad_clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=clip_norm_thres))
else:
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=learning_rate,
grad_clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=clip_norm_thres))
scheduled_lr = learning_rate
clip_norm_thres = 1.0
# When using mixed precision training, scale the gradient clip threshold
# by loss_scaling
if use_fp16 and loss_scaling > 1.0:
clip_norm_thres *= loss_scaling
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name):
if name.find("layer_norm") > -1:
return True
......
......@@ -29,7 +29,7 @@ DATA_MODEL_PATH = {
"DATA_PATH": "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz",
"PRETRAIN_MODEL":
"https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz",
"TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz"
"TRAINED_MODEL": "https://baidu-nlp.bj.bcebos.com/dgu_models_3.0.0.tar.gz"
}
PATH_MAP = {
......
......@@ -25,7 +25,6 @@ import paddle.fluid as fluid
from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
import dgu.reader as reader
from dgu_net import create_net
......@@ -97,12 +96,10 @@ def do_save_inference_model(args):
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model)
assert (args.init_from_params)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
elif args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
fluid.load(test_prog, args.init_from_params)
# saving inference model
fluid.io.save_inference_model(
......
......@@ -16,7 +16,6 @@ import os
import sys
import numpy as np
import paddle
import paddle.fluid as fluid
from eval import do_eval
......
......@@ -19,7 +19,6 @@ import sys
import numpy as np
import argparse
import collections
import paddle
import paddle.fluid as fluid
import dgu.reader as reader
......@@ -30,7 +29,6 @@ import dgu.define_predict_pack as define_predict_pack
from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
from dgu.utils.py23 import tab_tok, rt_tok
......@@ -84,7 +82,7 @@ def do_predict(args):
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(
data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
results = create_net(
......@@ -95,9 +93,6 @@ def do_predict(args):
args=args)
probs = results.get("probs", None)
probs.persistable = True
fetch_list = [probs.name]
#for_test is True if change the is_test attribute of operators to True
......@@ -111,12 +106,10 @@ def do_predict(args):
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model)
assert (args.init_from_params)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
fluid.load(test_prog, args.init_from_params)
compiled_test_prog = fluid.CompiledProgram(test_prog)
......@@ -130,7 +123,7 @@ def do_predict(args):
batch_generator = processor.data_generator(
batch_size=args.batch_size, phase='test', shuffle=False)
data_reader.decorate_batch_generator(batch_generator)
data_reader.set_batch_generator(batch_generator, places=place)
data_reader.start()
all_results = []
......
......@@ -3,7 +3,7 @@
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=1
export CUDA_VISIBLE_DEVICES=
if [ ! "$CUDA_VISIBLE_DEVICES" ]
then
export CPU_NUM=1
......@@ -21,7 +21,7 @@ SAVE_MODEL_PATH="./data/saved_models/${TASK_NAME}"
TRAIN_MODEL_PATH="./data/saved_models/trained_models"
OUTPUT_PATH="./data/output"
INFERENCE_MODEL="data/inference_models"
PYTHON_PATH="python3"
PYTHON_PATH="python"
if [ -f ${SAVE_MODEL_PATH} ]; then
rm ${SAVE_MODEL_PATH}
......@@ -94,7 +94,6 @@ else
exit 255
fi
#training
function train()
{
......@@ -110,7 +109,6 @@ function train()
--vocab_path=${BERT_BASE_PATH}/vocab.txt \
--init_from_pretrain_model=${BERT_BASE_PATH}/params \
--save_model_path=${SAVE_MODEL_PATH} \
--save_param="params" \
--save_steps=${save_steps} \
--learning_rate=${learning_rate} \
--weight_decay=0.01 \
......@@ -128,7 +126,7 @@ function predict()
--batch_size=${batch_size} \
--data_dir=${INPUT_PATH} \
--do_lower_case=true \
--init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params \
--init_from_params=${TRAIN_MODEL_PATH}/${TASK_NAME}/params/params \
--bert_config_path=${BERT_BASE_PATH}/bert_config.json \
--vocab_path=${BERT_BASE_PATH}/vocab.txt \
--output_prediction_file=${OUTPUT_PATH}/pred_${TASK_NAME} \
......
......@@ -22,7 +22,6 @@ import sys
import time
import numpy as np
import paddle
import paddle.fluid as fluid
from dgu_net import create_net
......@@ -32,7 +31,6 @@ import dgu.define_paradigm as define_paradigm
from dgu.utils.configure import PDConfig
from dgu.utils.input_field import InputField
from dgu.utils.model_check import check_cuda
import dgu.utils.save_load_io as save_load_io
def do_train(args):
......@@ -80,8 +78,9 @@ def do_train(args):
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.DataLoader.from_generator(feed_list=input_inst, capacity=4, iterable=False)
data_reader = fluid.io.DataLoader.from_generator(
feed_list=input_inst, capacity=4, iterable=False)
processor = processors[task_name](data_dir=args.data_dir,
vocab_path=args.vocab_path,
......@@ -103,13 +102,8 @@ def do_train(args):
accuracy = results.get("accuracy", None)
num_seqs = results.get("num_seqs", None)
loss.persistable = True
probs.persistable = True
if accuracy:
accuracy.persistable = True
num_seqs.persistable = True
places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places()
places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places(
)
dev_count = len(places)
batch_generator = processor.data_generator(
......@@ -149,16 +143,13 @@ def do_train(args):
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_checkpoint == "") or (
args.init_from_pretrain_model == "")
assert args.init_from_params or args.init_from_pretrain_model
# init from some checkpoint, to resume the previous training
if args.init_from_checkpoint:
save_load_io.init_from_checkpoint(args, exe, train_prog)
# init from some pretrain models, to better solve the current task
if args.init_from_params:
fluid.load(train_prog, args.init_from_params, exe)
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, train_prog)
fluid.load(train_prog, args.init_from_pretrain_model, exe)
build_strategy = fluid.compiler.BuildStrategy()
build_strategy.enable_inplace = True
......@@ -234,21 +225,16 @@ def do_train(args):
time_begin = time.time()
if steps % args.save_steps == 0:
save_path = "step_" + str(steps)
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog,
save_path)
if args.save_param:
save_load_io.save_param(args, exe, train_prog,
save_path)
model_path = os.path.join(args.save_model_path,
"step_" + str(steps))
fluid.save(train_prog, model_path)
except fluid.core.EOFException:
data_reader.reset()
break
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
if args.save_param:
save_load_io.save_param(args, exe, train_prog, "step_final")
model_path = os.path.join(args.save_model_path, "step_final")
fluid.save(train_prog, model_path)
def get_cards():
num = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册