diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md index 88e4eb64f218b8dc8c9eabca2fb2911f1c5c69d6..a4efff2179a5367ad796b5793736c771acc4d8be 100644 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md @@ -28,7 +28,7 @@ - cuda >= 9.0 - cudnn >= 7.0 - pandas >= 0.20.1 -- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle +- PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 本模块使用bert作为pretrain model进行模型的finetuning训练,训练速度较慢,建议安装GPU版本的PaddlePaddle   注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)。 diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py index d46d75faf1122d8b6e8a2843991fd5114b847c04..8907b798cf6e4457954c89997e7ad9a84fcbe61e 100755 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py @@ -35,7 +35,7 @@ def create_net( label = model_input.labels #emb - context_emb = fluid.layers.embedding( + context_emb = fluid.input.embedding( input=context_wordseq, size=[args.vocab_size, args.emb_size], is_sparse=True, @@ -43,7 +43,7 @@ def create_net( name=word_emb_name, initializer=fluid.initializer.Normal(scale=0.1))) - response_emb = fluid.layers.embedding( + response_emb = fluid.input.embedding( input=response_wordseq, size=[args.vocab_size, args.emb_size], is_sparse=True, diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py index ee4e6222cf560ccaf445994972d6abc26783ca9e..ae4968a247ea93a5f2585b0d5c179fae962e6265 100644 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py @@ -42,12 +42,12 @@ def do_save_inference_model(args): with fluid.unique_name.guard(): - context_wordseq = fluid.layers.data( - name='context_wordseq', shape=[1], dtype='int64', lod_level=1) - response_wordseq = fluid.layers.data( - name='response_wordseq', shape=[1], dtype='int64', lod_level=1) - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + context_wordseq = fluid.data( + name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + response_wordseq = fluid.data( + name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py index 1f75b903c7261bcf94570181dd0324d10d7cf92a..9618e78caaa3c2562212bd34cc5ab7d2f7977ab6 100644 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py @@ -45,12 +45,12 @@ def do_predict(args): with fluid.unique_name.guard(): - context_wordseq = fluid.layers.data( - name='context_wordseq', shape=[1], dtype='int64', lod_level=1) - response_wordseq = fluid.layers.data( - name='response_wordseq', shape=[1], dtype='int64', lod_level=1) - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + context_wordseq = fluid.data( + name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + response_wordseq = fluid.data( + name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py index 0ee6ade59509d285e816b8311a0abdf24d807171..828f03a9bea0296254c8ef2cbefef779ae26f0af 100755 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py @@ -48,12 +48,12 @@ def do_train(args): startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): - context_wordseq = fluid.layers.data( - name='context_wordseq', shape=[1], dtype='int64', lod_level=1) - response_wordseq = fluid.layers.data( - name='response_wordseq', shape=[1], dtype='int64', lod_level=1) - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + context_wordseq = fluid.data( + name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + response_wordseq = fluid.data( + name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md index 77783058744e5b34d4b212df9d4be60b2e70a9f7..3197e9a0b1961d6f65da4a993069e33cbdb954a2 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md @@ -23,7 +23,7 @@ - Python >= 2.7 - cuda >= 9.0 - cudnn >= 7.0 -- PaddlePaddle >= 1.3.1,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。 +- PaddlePaddle >= 1.6.0,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装, 由于模块内模型基于bert做finetuning, 训练速度较慢, 建议用户安装GPU版本PaddlePaddle进行训练。   注意:使用Windows GPU环境的用户,需要将示例代码中的[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)。 ####   b、下载代码 diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/batching.py b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/batching.py index fc47bbe73afa3709b364c25a4a7f48c87aa41566..d668fd7dfeb0d0fa0bf025a13b8838f700d130b8 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/batching.py +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/batching.py @@ -170,7 +170,7 @@ def pad_batch_data(insts, inst_data = np.array( [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts ]) - return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_data.astype("int64").reshape([-1, max_len])] # position data if return_pos: @@ -179,7 +179,7 @@ def pad_batch_data(insts, for inst in insts ]) - return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_pos.astype("int64").reshape([-1, max_len])] if return_input_mask: # This is used to avoid attention on paddings. diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/bert.py b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/bert.py index 5726d63f811800980b7c8527aef144900118207d..43a2a62df59b3eeb1e1764fae0636df687981b67 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/bert.py +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/bert.py @@ -85,21 +85,21 @@ class BertModel(object): def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( + emb_out = fluid.input.embedding( input=src_ids, size=[self._voc_size, self._emb_size], dtype=self._dtype, param_attr=fluid.ParamAttr( name=self._word_emb_name, initializer=self._param_initializer), is_sparse=False) - position_emb_out = fluid.layers.embedding( + position_emb_out = fluid.input.embedding( input=position_ids, size=[self._max_position_seq_len, self._emb_size], dtype=self._dtype, param_attr=fluid.ParamAttr( name=self._pos_emb_name, initializer=self._param_initializer)) - sent_emb_out = fluid.layers.embedding( + sent_emb_out = fluid.input.embedding( sentence_ids, size=[self._sent_types, self._emb_size], dtype=self._dtype, diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py index a95536bd1efa5d1bf90817f9d47f28c771bd4c52..01e6d96132998b36a3a3db55289cbe9e202e831c 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py @@ -59,23 +59,23 @@ def do_save_inference_model(args): # define inputs of the network num_labels = len(processors[task_name].get_labels()) - src_ids = fluid.layers.data( - name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') - pos_ids = fluid.layers.data( - name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') - sent_ids = fluid.layers.data( - name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') - input_mask = fluid.layers.data( - name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') + src_ids = fluid.data( + name='src_ids', shape=[-1, args.max_seq_len], dtype='int64') + pos_ids = fluid.data( + name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64') + sent_ids = fluid.data( + name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64') + input_mask = fluid.data( + name='input_mask', shape=[-1, args.max_seq_len], dtype='float32') if args.task_name == 'atis_slot': - labels = fluid.layers.data( - name='labels', shape=[args.max_seq_len], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, args.max_seq_len], dtype='int64') elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: - labels = fluid.layers.data( - name='labels', shape=[num_labels], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, num_labels], dtype='int64') else: - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_field = InputField(input_inst) diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py index c1c32fcb545299c27d4dc75f13bd0f32475a0cb7..d159b302b304abfb345fa614b5ec92f505a3796e 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py @@ -60,23 +60,23 @@ def do_predict(args): # define inputs of the network num_labels = len(processors[task_name].get_labels()) - src_ids = fluid.layers.data( - name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') - pos_ids = fluid.layers.data( - name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') - sent_ids = fluid.layers.data( - name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') - input_mask = fluid.layers.data( - name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') + src_ids = fluid.data( + name='src_ids', shape=[-1, args.max_seq_len], dtype='int64') + pos_ids = fluid.data( + name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64') + sent_ids = fluid.data( + name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64') + input_mask = fluid.data( + name='input_mask', shape=[-1, args.max_seq_len], dtype='float32') if args.task_name == 'atis_slot': - labels = fluid.layers.data( - name='labels', shape=[args.max_seq_len], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, args.max_seq_len], dtype='int64') elif args.task_name in ['dstc2', 'dstc2_asr', 'multi-woz']: - labels = fluid.layers.data( - name='labels', shape=[num_labels], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, num_labels], dtype='int64') else: - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_field = InputField(input_inst) diff --git a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py index 2c4540a7bc1a9cdd08cc32c7d1e0b4a2323e99ce..51bee33974a98fbe1518acf986a57fa745d1b73c 100644 --- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py +++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py @@ -60,23 +60,23 @@ def do_train(args): with fluid.unique_name.guard(): num_labels = len(processors[task_name].get_labels()) - src_ids = fluid.layers.data( - name='src_ids', shape=[args.max_seq_len, 1], dtype='int64') - pos_ids = fluid.layers.data( - name='pos_ids', shape=[args.max_seq_len, 1], dtype='int64') - sent_ids = fluid.layers.data( - name='sent_ids', shape=[args.max_seq_len, 1], dtype='int64') - input_mask = fluid.layers.data( - name='input_mask', shape=[args.max_seq_len, 1], dtype='float32') + src_ids = fluid.data( + name='src_ids', shape=[-1, args.max_seq_len], dtype='int64') + pos_ids = fluid.data( + name='pos_ids', shape=[-1, args.max_seq_len], dtype='int64') + sent_ids = fluid.data( + name='sent_ids', shape=[-1, args.max_seq_len], dtype='int64') + input_mask = fluid.data( + name='input_mask', shape=[-1, args.max_seq_len], dtype='float32') if args.task_name == 'atis_slot': - labels = fluid.layers.data( - name='labels', shape=[args.max_seq_len], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, args.max_seq_len], dtype='int64') elif args.task_name in ['dstc2']: - labels = fluid.layers.data( - name='labels', shape=[num_labels], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, num_labels], dtype='int64') else: - labels = fluid.layers.data( - name='labels', shape=[1], dtype='int64') + labels = fluid.data( + name='labels', shape=[-1, 1], dtype='int64') input_inst = [src_ids, pos_ids, sent_ids, input_mask, labels] input_field = InputField(input_inst)