未验证 提交 01c07b2f 编写于 作者: 0 0YuanZhang0 提交者: GitHub

upgrade_1.6_op (#3498)

上级 d3767aaf
......@@ -28,7 +28,7 @@
- cuda >= 9.0
- cudnn >= 7.0
- pandas >= 0.20.1
- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
- PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
  注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)
......
......@@ -35,7 +35,7 @@ def create_net(
label = model_input.labels
#emb
context_emb = fluid.layers.embedding(
context_emb = fluid.input.embedding(
input=context_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
......@@ -43,7 +43,7 @@ def create_net(
name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1)))
response_emb = fluid.layers.embedding(
response_emb = fluid.input.embedding(
input=response_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
......
......@@ -42,12 +42,12 @@ def do_save_inference_model(args):
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
context_wordseq = fluid.data(
name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.data(
name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
......
......@@ -45,12 +45,12 @@ def do_predict(args):
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
context_wordseq = fluid.data(
name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.data(
name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
......
......@@ -48,12 +48,12 @@ def do_train(args):
startup_prog.random_seed = args.random_seed
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
context_wordseq = fluid.data(
name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.data(
name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
......
......@@ -23,7 +23,7 @@
- Python >= 2.7
- cuda >= 9.0
- cudnn >= 7.0
- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
- PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
  注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)
####   b、下载代码
......
......@@ -170,7 +170,7 @@ def pad_batch_data(insts,
inst_data = np.array(
[inst + list([pad_idx] * (max_len - len(inst))) for inst in insts
])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
......@@ -179,7 +179,7 @@ def pad_batch_data(insts,
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
......
......@@ -85,21 +85,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
emb_out = fluid.input.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
position_emb_out = fluid.layers.embedding(
position_emb_out = fluid.input.embedding(
input=position_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_emb_out = fluid.input.embedding(
sentence_ids,
size=[self._sent_types, self._emb_size],
dtype=self._dtype,
......
......@@ -59,23 +59,23 @@ def do_save_inference_model(args):
# define inputs of the network
num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64')
pos_ids = fluid.layers.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64')
sent_ids = fluid.layers.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64')
input_mask = fluid.layers.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32')
src_ids = fluid.data(
name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.data(
name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.data(
name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.data(
name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot':
labels = fluid.layers.data(
name='labels', shape=[args.max_seq_len], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
labels = fluid.layers.data(
name='labels', shape=[num_labels], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, num_labels], dtype='int64')
else:
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst)
......
......@@ -60,23 +60,23 @@ def do_predict(args):
# define inputs of the network
num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64')
pos_ids = fluid.layers.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64')
sent_ids = fluid.layers.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64')
input_mask = fluid.layers.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32')
src_ids = fluid.data(
name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.data(
name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.data(
name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.data(
name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot':
labels = fluid.layers.data(
name='labels', shape=[args.max_seq_len], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
labels = fluid.layers.data(
name='labels', shape=[num_labels], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, num_labels], dtype='int64')
else:
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst)
......
......@@ -60,23 +60,23 @@ def do_train(args):
with fluid.unique_name.guard():
num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64')
pos_ids = fluid.layers.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64')
sent_ids = fluid.layers.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64')
input_mask = fluid.layers.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32')
src_ids = fluid.data(
name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.data(
name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.data(
name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.data(
name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot':
labels = fluid.layers.data(
name='labels', shape=[args.max_seq_len], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2']:
labels = fluid.layers.data(
name='labels', shape=[num_labels], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, num_labels], dtype='int64')
else:
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
labels = fluid.data(
name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册