diff --git a/PaddleNLP/examples/dialogue/dgu/README.md b/PaddleNLP/examples/dialogue/dgu/README.md index 3a8ba8b17766de094e2abb40b63f97460e77d2b1..6afa6b850312a45d93bb4fea3c67ab6fe2171e7c 100644 --- a/PaddleNLP/examples/dialogue/dgu/README.md +++ b/PaddleNLP/examples/dialogue/dgu/README.md @@ -7,12 +7,12 @@ DGU模型内共包含6个任务,全部基于公开数据集在Paddle2.0上完成训练及评估,详细说明如下: ``` -DRS: 使用UDC (Ubuntu Corpus V1) 数据集完成对话匹配 (Dialogue Response Selection) 任务; -DST: 使用DSTC2 (Dialog State Tracking Challenge 2) 数据集完成对话状态追踪 (Dialogue State Tracking) 任务; -DSF: 使用ATIS (Airline Travel Information System) 数据集完成对话槽填充 (Dialogue Slot Filling) 任务; -DID: 使用ATIS (Airline Travel Information System) 数据集完成对话意图识别 (Dialogue Intent Detection) 任务; -MRDA: 使用MRDAC (Meeting Recorder Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务; -SwDA: 使用SwDAC (Switchboard Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务; +udc: 使用UDC (Ubuntu Corpus V1) 数据集完成对话匹配 (Dialogue Response Selection) 任务; +dstc2: 使用DSTC2 (Dialog State Tracking Challenge 2) 数据集完成对话状态追踪 (Dialogue State Tracking) 任务; +atis_slot: 使用ATIS (Airline Travel Information System) 数据集完成对话槽填充 (Dialogue Slot Filling) 任务; +atis_intent: 使用ATIS (Airline Travel Information System) 数据集完成对话意图识别 (Dialogue Intent Detection) 任务; +mrda: 使用MRDAC (Meeting Recorder Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务; +swda: 使用SwDAC (Switchboard Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务; ``` ## 模型效果 @@ -21,14 +21,14 @@ DGU模型中的6个任务,分别采用不同的评估指标在test集上进行 - + - - - - - + + + + +
任务评估指标DGU
DRSR1@1081.04%
udcR1@1081.04%
R2@1089.85%
R5@1097.59%
DSTJoint_Acc90.43%
DSFF1_Micro97.98%
DIDAcc97.42%
MRDAAcc90.94%
SwDAAcc80.61%
dstc2Joint_Acc90.43%
atis_slotF1_Micro97.98%
atis_intentAcc97.42%
mrdaAcc90.94%
swdaAcc80.61%
**NOTE:** 以上结果均是采用默认配置在GPU单卡上训练和评估得到的,用户如需复现效果,可采用默认配置在单卡上进行训练评估。 @@ -77,22 +77,22 @@ DGU_datasets目录结构: ```text DGU_datasets/ -├── did +├── atis_intent │   ├── dev.txt │   ├── map_tag_intent_id.txt │   ├── test.txt │   └── train.txt -├── drs +├── udc │   ├── dev.txt │   ├── dev.txt-small │   ├── test.txt │   └── train.txt -├── dsf +├── atis_slot │   ├── dev.txt │   ├── map_tag_slot_id.txt │   ├── test.txt │   └── train.txt -├── dst +├── dstc2 │   ├── dev.txt │   ├── map_tag_id.txt │   ├── test.txt @@ -112,16 +112,16 @@ DGU_datasets/ 数据的每一行由多列组成,都以"\t"作为分割符,详细数据格式说明如下: ``` -drs:由label、多轮对话conv和回应response组成 +udc:由label、多轮对话conv和回应response组成 格式:label \t conv1 \t conv2 \t conv3 \t ... \t response -dst:由多轮对话id、当前轮QA对(使用\1拼接)和对话状态序列state_list(state_list中每个state由空格分割)组成 +dstc2:由多轮对话id、当前轮QA对(使用\1拼接)和对话状态序列state_list(state_list中每个state由空格分割)组成 格式:conversation_id \t question \1 answer \t state1 state2 state3 ... -dsf:由对话内容conversation_content和标签序列label_list (label_list中每个label由空格分割) 组成, 其中标签序列和对话内容中word为一一对应关系 +atis_slot:由对话内容conversation_content和标签序列label_list (label_list中每个label由空格分割) 组成, 其中标签序列和对话内容中word为一一对应关系 格式:conversation_content \t label1 label2 label3 ... -did:由标签label和对话内容conversation_content组成 +atis_intent:由标签label和对话内容conversation_content组成 格式: label \t conversation_content mrda:由多轮对话id、标签label、发言人caller、对话内容conversation_content组成 @@ -140,14 +140,14 @@ swda:由多轮对话id、标签label、发言人caller、对话内容conversat ```shell export CUDA_VISIBLE_DEVICES=0,1 # GPU启动,n_gpu指定训练所用的GPU数量,可以是单卡,也可以多卡。默认会进行训练、验证和评估 -python -u main.py --task_name=drs --data_dir=./DGU_datasets/drs --output_dir=./checkpoints/drs --n_gpu=2 +python -u main.py --task_name=udc --data_dir=./DGU_datasets/udc --output_dir=./checkpoints/udc --n_gpu=2 # 若只需进行评估,do_train设为False,并且必须指定init_from_ckpt -# python -u main.py --task_name=drs --data_dir=./DGU_datasets/drs --do_train=False --init_from_ckpt=./checkpoints/drs/best +# python -u main.py --task_name=udc --data_dir=./DGU_datasets/udc --do_train=False --init_from_ckpt=./checkpoints/udc/best ``` 以上参数表示: -* task_name:任务名称,可以为drs、dst、dsf、did、mrda或swda。 +* task_name:任务名称,可以为udc、dstc2、atis_slot、atis_intent、mrda或swda。 * data_dir:训练数据路径。 * output_dir:训练保存模型的文件路径。 * n_gpu:训练所使用的GPU卡的数量,默认为1。 diff --git a/PaddleNLP/examples/dialogue/dgu/args.py b/PaddleNLP/examples/dialogue/dgu/args.py index df6c37c0b4b80353d2e4727a67d1be41a9ed69ec..2ae6dce5a83918fab273c21a1e6de133382bcccb 100644 --- a/PaddleNLP/examples/dialogue/dgu/args.py +++ b/PaddleNLP/examples/dialogue/dgu/args.py @@ -108,7 +108,7 @@ def parse_args(): def set_default_args(args): args.task_name = args.task_name.lower() - if args.task_name == 'drs': + if args.task_name == 'udc': if not args.save_steps: args.save_steps = 1000 if not args.logging_steps: @@ -119,7 +119,7 @@ def set_default_args(args): args.max_seq_len = 210 if not args.test_batch_size: args.test_batch_size = 100 - elif args.task_name == 'dst': + elif args.task_name == 'dstc2': if not args.save_steps: args.save_steps = 400 if not args.logging_steps: @@ -132,14 +132,14 @@ def set_default_args(args): args.max_seq_len = 256 if not args.test_max_seq_len: args.test_max_seq_len = 512 - elif args.task_name == 'dsf': + elif args.task_name == 'atis_slot': if not args.save_steps: args.save_steps = 100 if not args.logging_steps: args.logging_steps = 10 if not args.epochs: args.epochs = 50 - elif args.task_name == 'did': + elif args.task_name == 'atis_intent': if not args.save_steps: args.save_steps = 100 if not args.logging_steps: diff --git a/PaddleNLP/examples/dialogue/dgu/data.py b/PaddleNLP/examples/dialogue/dgu/data.py index 0e50e0a99819ca59110c8f0a8a9e6cae7838e550..97db64495773609c2a260102fbbd8151b43c5b8d 100644 --- a/PaddleNLP/examples/dialogue/dgu/data.py +++ b/PaddleNLP/examples/dialogue/dgu/data.py @@ -20,7 +20,7 @@ def get_label_map(label_list): class UDCv1(Dataset): """ - The UDCv1 dataset is using in task DRS(Dialogue Response Selection). + The UDCv1 dataset is using in task Dialogue Response Selection. The source dataset is UDCv1(Ubuntu Dialogue Corpus v1.0). See detail at http://dataset.cs.mcgill.ca/ubuntu-corpus-1.0/ """ @@ -107,7 +107,7 @@ class UDCv1(Dataset): class DSTC2(Dataset): """ - The dataset DSTC2 is using in task DST(Dialogue State Tracking). + The dataset DSTC2 is using in task Dialogue State Tracking. The source dataset is DSTC2(Dialog State Tracking Challenges 2). See detail at https://github.com/matthen/dstc """ @@ -207,7 +207,7 @@ class DSTC2(Dataset): class ATIS_DSF(Dataset): """ - The dataset ATIS_DSF is using in task DSF(Dialogue Slot Filling). + The dataset ATIS_DSF is using in task Dialogue Slot Filling. The source dataset is ATIS(Airline Travel Information System). See detail at https://www.kaggle.com/siddhadev/ms-cntk-atis """ @@ -281,7 +281,7 @@ class ATIS_DSF(Dataset): class ATIS_DID(Dataset): """ - The dataset ATIS_ID is using in task DID(Dialogue Intent Detection). + The dataset ATIS_ID is using in task Dialogue Intent Detection. The source dataset is ATIS(Airline Travel Information System). See detail at https://www.kaggle.com/siddhadev/ms-cntk-atis """ @@ -441,7 +441,7 @@ def truncate_and_concat(pre_txt: List[str], class MRDA(Dataset): """ - The dataset MRDA is using in task DA(Dialogue Act). + The dataset MRDA is using in task Dialogue Act. The source dataset is MRDA(Meeting Recorder Dialogue Act). See detail at https://www.aclweb.org/anthology/W04-2319.pdf """ @@ -479,7 +479,7 @@ class MRDA(Dataset): class SwDA(Dataset): """ - The dataset SwDA is using in task DA(Dialogue Act). + The dataset SwDA is using in task Dialogue Act. The source dataset is SwDA(Switchboard Dialog Act). See detail at http://compprag.christopherpotts.net/swda.html """ diff --git a/PaddleNLP/examples/dialogue/dgu/main.py b/PaddleNLP/examples/dialogue/dgu/main.py index fe5cca2eb890fe6d9b8b127dfec9544c150ea3ec..b7a12664550b60990974993bb0477ad805394d78 100644 --- a/PaddleNLP/examples/dialogue/dgu/main.py +++ b/PaddleNLP/examples/dialogue/dgu/main.py @@ -22,10 +22,10 @@ import data import metric TASK_CLASSES = { - 'drs': (data.UDCv1, metric.RecallAtK), - 'dst': (data.DSTC2, metric.JointAccuracy), - 'dsf': (data.ATIS_DSF, metric.F1Score), - 'did': (data.ATIS_DID, Accuracy), + 'udc': (data.UDCv1, metric.RecallAtK), + 'dstc2': (data.DSTC2, metric.JointAccuracy), + 'atis_slot': (data.ATIS_DSF, metric.F1Score), + 'atis_intent': (data.ATIS_DID, Accuracy), 'mrda': (data.MRDA, Accuracy), 'swda': (data.SwDA, Accuracy) } @@ -70,18 +70,20 @@ class DGULossFunction(nn.Layer): self.loss_fn = self.get_loss_fn() def get_loss_fn(self): - if self.task_name in ['drs', 'dsf', 'did', 'mrda', 'swda']: + if self.task_name in [ + 'udc', 'atis_slot', 'atis_intent', 'mrda', 'swda' + ]: return F.softmax_with_cross_entropy - elif self.task_name == 'dst': + elif self.task_name == 'dstc2': return nn.BCEWithLogitsLoss(reduction='sum') def forward(self, logits, labels): - if self.task_name in ['drs', 'did', 'mrda', 'swda']: + if self.task_name in ['udc', 'atis_intent', 'mrda', 'swda']: loss = self.loss_fn(logits, labels) loss = paddle.mean(loss) - elif self.task_name == 'dst': + elif self.task_name == 'dstc2': loss = self.loss_fn(logits, paddle.cast(labels, dtype=logits.dtype)) - elif self.task_name == 'dsf': + elif self.task_name == 'atis_slot': labels = paddle.unsqueeze(labels, axis=-1) loss = self.loss_fn(logits, labels) loss = paddle.mean(loss) @@ -89,8 +91,8 @@ class DGULossFunction(nn.Layer): def print_logs(args, step, logits, labels, loss, total_time, metric): - if args.task_name in ['drs', 'did', 'mrda', 'swda']: - if args.task_name == 'drs': + if args.task_name in ['udc', 'atis_intent', 'mrda', 'swda']: + if args.task_name == 'udc': metric = Accuracy() metric.reset() correct = metric.compute(logits, labels) @@ -98,13 +100,13 @@ def print_logs(args, step, logits, labels, loss, total_time, metric): acc = metric.accumulate() print('step %d - loss: %.4f - acc: %.4f - %.3fs/step' % (step, loss, acc, total_time / args.logging_steps)) - elif args.task_name == 'dst': + elif args.task_name == 'dstc2': metric.reset() metric.update(logits, labels) joint_acc = metric.accumulate() print('step %d - loss: %.4f - joint_acc: %.4f - %.3fs/step' % (step, loss, joint_acc, total_time / args.logging_steps)) - elif args.task_name == 'dsf': + elif args.task_name == 'atis_slot': metric.reset() metric.update(logits, labels) f1_micro = metric.accumulate() @@ -181,13 +183,14 @@ def train(args, model, train_data_loader, dev_data_loader, metric, rank): batch_start_time = time.time() +@paddle.no_grad() def evaluation(args, model, data_loader, metric): model.eval() metric.reset() for batch in data_loader: input_ids, segment_ids, labels = batch logits = model(input_ids, segment_ids) - if args.task_name in ['did', 'mrda', 'swda']: + if args.task_name in ['atis_intent', 'mrda', 'swda']: correct = metric.compute(logits, labels) metric.update(correct) else: @@ -195,17 +198,17 @@ def evaluation(args, model, data_loader, metric): model.train() metric_out = metric.accumulate() print('Total samples: %d' % (len(data_loader) * args.test_batch_size)) - if args.task_name == 'drs': + if args.task_name == 'udc': print('R1@10: %.4f - R2@10: %.4f - R5@10: %.4f\n' % (metric_out[0], metric_out[1], metric_out[2])) return metric_out[0] - elif args.task_name == 'dst': + elif args.task_name == 'dstc2': print('Joint_acc: %.4f\n' % metric_out) return metric_out - elif args.task_name == 'dsf': + elif args.task_name == 'atis_slot': print('F1_micro: %.4f\n' % metric_out) return metric_out - elif args.task_name in ['did', 'mrda', 'swda']: + elif args.task_name in ['atis_intent', 'mrda', 'swda']: print('Acc: %.4f\n' % metric_out) return metric_out @@ -248,7 +251,7 @@ def main(args): max_seq_length=args.test_max_seq_len) metric = metric_class() - if args.task_name in ('drs', 'dst', 'did', 'mrda', 'swda'): + if args.task_name in ('udc', 'dstc2', 'atis_intent', 'mrda', 'swda'): batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # input Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment @@ -256,7 +259,7 @@ def main(args): ): fn(samples) model = BertForSequenceClassification.from_pretrained( args.model_name_or_path, num_classes=dataset_class.num_classes()) - elif args.task_name == 'dsf': + elif args.task_name == 'atis_slot': batchify_fn = lambda samples, fn=Tuple( Pad(axis=0, pad_val=tokenizer.pad_token_id), # input Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment