未验证 提交 01c07b2f 编写于 作者: 0 0YuanZhang0 提交者: GitHub

upgrade_1.6_op (#3498)

上级 d3767aaf
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- pandas >= 0.20.1 - pandas >= 0.20.1
- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle - PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle
  注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)   注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)
......
...@@ -35,7 +35,7 @@ def create_net( ...@@ -35,7 +35,7 @@ def create_net(
label = model_input.labels label = model_input.labels
#emb #emb
context_emb = fluid.layers.embedding( context_emb = fluid.input.embedding(
input=context_wordseq, input=context_wordseq,
size=[args.vocab_size, args.emb_size], size=[args.vocab_size, args.emb_size],
is_sparse=True, is_sparse=True,
...@@ -43,7 +43,7 @@ def create_net( ...@@ -43,7 +43,7 @@ def create_net(
name=word_emb_name, name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1))) initializer=fluid.initializer.Normal(scale=0.1)))
response_emb = fluid.layers.embedding( response_emb = fluid.input.embedding(
input=response_wordseq, input=response_wordseq,
size=[args.vocab_size, args.emb_size], size=[args.vocab_size, args.emb_size],
is_sparse=True, is_sparse=True,
......
...@@ -42,12 +42,12 @@ def do_save_inference_model(args): ...@@ -42,12 +42,12 @@ def do_save_inference_model(args):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
context_wordseq = fluid.layers.data( context_wordseq = fluid.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1) name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data( response_wordseq = fluid.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1) name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
...@@ -45,12 +45,12 @@ def do_predict(args): ...@@ -45,12 +45,12 @@ def do_predict(args):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
context_wordseq = fluid.layers.data( context_wordseq = fluid.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1) name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data( response_wordseq = fluid.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1) name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
...@@ -48,12 +48,12 @@ def do_train(args): ...@@ -48,12 +48,12 @@ def do_train(args):
startup_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed
with fluid.unique_name.guard(): with fluid.unique_name.guard():
context_wordseq = fluid.layers.data( context_wordseq = fluid.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1) name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data( response_wordseq = fluid.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1) name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1)
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels] input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。 - PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。
  注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)   注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)
####   b、下载代码 ####   b、下载代码
......
...@@ -170,7 +170,7 @@ def pad_batch_data(insts, ...@@ -170,7 +170,7 @@ def pad_batch_data(insts,
inst_data = np.array( inst_data = np.array(
[inst + list([pad_idx] * (max_len - len(inst))) for inst in insts [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts
]) ])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data # position data
if return_pos: if return_pos:
...@@ -179,7 +179,7 @@ def pad_batch_data(insts, ...@@ -179,7 +179,7 @@ def pad_batch_data(insts,
for inst in insts for inst in insts
]) ])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask: if return_input_mask:
# This is used to avoid attention on paddings. # This is used to avoid attention on paddings.
......
...@@ -85,21 +85,21 @@ class BertModel(object): ...@@ -85,21 +85,21 @@ class BertModel(object):
def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): def _build_model(self, src_ids, position_ids, sentence_ids, input_mask):
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding( emb_out = fluid.input.embedding(
input=src_ids, input=src_ids,
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer), name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False) is_sparse=False)
position_emb_out = fluid.layers.embedding( position_emb_out = fluid.input.embedding(
input=position_ids, input=position_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer)) name=self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding( sent_emb_out = fluid.input.embedding(
sentence_ids, sentence_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._dtype, dtype=self._dtype,
......
...@@ -59,23 +59,23 @@ def do_save_inference_model(args): ...@@ -59,23 +59,23 @@ def do_save_inference_model(args):
# define inputs of the network # define inputs of the network
num_labels = len(processors[task_name].get_labels()) num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data( src_ids = fluid.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.layers.data( pos_ids = fluid.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.layers.data( sent_ids = fluid.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.layers.data( input_mask = fluid.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot': if args.task_name == 'atis_slot':
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[args.max_seq_len], dtype='int64') name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[num_labels], dtype='int64') name='labels', shape=[-1, num_labels], dtype='int64')
else: else:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
...@@ -60,23 +60,23 @@ def do_predict(args): ...@@ -60,23 +60,23 @@ def do_predict(args):
# define inputs of the network # define inputs of the network
num_labels = len(processors[task_name].get_labels()) num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data( src_ids = fluid.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.layers.data( pos_ids = fluid.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.layers.data( sent_ids = fluid.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.layers.data( input_mask = fluid.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot': if args.task_name == 'atis_slot':
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[args.max_seq_len], dtype='int64') name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[num_labels], dtype='int64') name='labels', shape=[-1, num_labels], dtype='int64')
else: else:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
...@@ -60,23 +60,23 @@ def do_train(args): ...@@ -60,23 +60,23 @@ def do_train(args):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
num_labels = len(processors[task_name].get_labels()) num_labels = len(processors[task_name].get_labels())
src_ids = fluid.layers.data( src_ids = fluid.data(
name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') name='src_ids', shape=[-1, args.max_seq_len], dtype='int64')
pos_ids = fluid.layers.data( pos_ids = fluid.data(
name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64')
sent_ids = fluid.layers.data( sent_ids = fluid.data(
name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64')
input_mask = fluid.layers.data( input_mask = fluid.data(
name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') name='input_mask', shape=[-1, args.max_seq_len], dtype='float32')
if args.task_name == 'atis_slot': if args.task_name == 'atis_slot':
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[args.max_seq_len], dtype='int64') name='labels', shape=[-1, args.max_seq_len], dtype='int64')
elif args.task_name in ['dstc2']: elif args.task_name in ['dstc2']:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[num_labels], dtype='int64') name='labels', shape=[-1, num_labels], dtype='int64')
else: else:
labels = fluid.layers.data( labels = fluid.data(
name='labels', shape=[1], dtype='int64') name='labels', shape=[-1, 1], dtype='int64')
input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels]
input_field = InputField(input_inst) input_field = InputField(input_inst)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册