From 2b2147b0c88fe8a51be6d40e4299f2e906ee8cdf Mon Sep 17 00:00:00 2001
From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com>
Date: Thu, 10 Dec 2020 21:55:49 +0800
Subject: [PATCH] Unified the task name of DGU with paddle1.8 (#5011)
* Unified the task name with paddle1.8
* fixed bug.
---
PaddleNLP/examples/dialogue/dgu/README.md | 46 +++++++++++------------
PaddleNLP/examples/dialogue/dgu/args.py | 8 ++--
PaddleNLP/examples/dialogue/dgu/data.py | 12 +++---
PaddleNLP/examples/dialogue/dgu/main.py | 43 +++++++++++----------
4 files changed, 56 insertions(+), 53 deletions(-)
diff --git a/PaddleNLP/examples/dialogue/dgu/README.md b/PaddleNLP/examples/dialogue/dgu/README.md
index 3a8ba8b1..6afa6b85 100644
--- a/PaddleNLP/examples/dialogue/dgu/README.md
+++ b/PaddleNLP/examples/dialogue/dgu/README.md
@@ -7,12 +7,12 @@
DGU模型内共包含6个任务,全部基于公开数据集在Paddle2.0上完成训练及评估,详细说明如下:
```
-DRS: 使用UDC (Ubuntu Corpus V1) 数据集完成对话匹配 (Dialogue Response Selection) 任务;
-DST: 使用DSTC2 (Dialog State Tracking Challenge 2) 数据集完成对话状态追踪 (Dialogue State Tracking) 任务;
-DSF: 使用ATIS (Airline Travel Information System) 数据集完成对话槽填充 (Dialogue Slot Filling) 任务;
-DID: 使用ATIS (Airline Travel Information System) 数据集完成对话意图识别 (Dialogue Intent Detection) 任务;
-MRDA: 使用MRDAC (Meeting Recorder Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务;
-SwDA: 使用SwDAC (Switchboard Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务;
+udc: 使用UDC (Ubuntu Corpus V1) 数据集完成对话匹配 (Dialogue Response Selection) 任务;
+dstc2: 使用DSTC2 (Dialog State Tracking Challenge 2) 数据集完成对话状态追踪 (Dialogue State Tracking) 任务;
+atis_slot: 使用ATIS (Airline Travel Information System) 数据集完成对话槽填充 (Dialogue Slot Filling) 任务;
+atis_intent: 使用ATIS (Airline Travel Information System) 数据集完成对话意图识别 (Dialogue Intent Detection) 任务;
+mrda: 使用MRDAC (Meeting Recorder Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务;
+swda: 使用SwDAC (Switchboard Dialogue Act Corpus) 数据集完成对话行为识别 (Dialogue Act Detection) 任务;
```
## 模型效果
@@ -21,14 +21,14 @@ DGU模型中的6个任务,分别采用不同的评估指标在test集上进行
任务 | 评估指标 | DGU |
- DRS | R1@10 | 81.04% |
+ udc | R1@10 | 81.04% |
R2@10 | 89.85% |
R5@10 | 97.59% |
- DST | Joint_Acc | 90.43% |
- DSF | F1_Micro | 97.98% |
- DID | Acc | 97.42% |
- MRDA | Acc | 90.94% |
- SwDA | Acc | 80.61% |
+ dstc2 | Joint_Acc | 90.43% |
+ atis_slot | F1_Micro | 97.98% |
+ atis_intent | Acc | 97.42% |
+ mrda | Acc | 90.94% |
+ swda | Acc | 80.61% |
**NOTE:** 以上结果均是采用默认配置在GPU单卡上训练和评估得到的,用户如需复现效果,可采用默认配置在单卡上进行训练评估。
@@ -77,22 +77,22 @@ DGU_datasets目录结构:
```text
DGU_datasets/
-├── did
+├── atis_intent
│ ├── dev.txt
│ ├── map_tag_intent_id.txt
│ ├── test.txt
│ └── train.txt
-├── drs
+├── udc
│ ├── dev.txt
│ ├── dev.txt-small
│ ├── test.txt
│ └── train.txt
-├── dsf
+├── atis_slot
│ ├── dev.txt
│ ├── map_tag_slot_id.txt
│ ├── test.txt
│ └── train.txt
-├── dst
+├── dstc2
│ ├── dev.txt
│ ├── map_tag_id.txt
│ ├── test.txt
@@ -112,16 +112,16 @@ DGU_datasets/
数据的每一行由多列组成,都以"\t"作为分割符,详细数据格式说明如下:
```
-drs:由label、多轮对话conv和回应response组成
+udc:由label、多轮对话conv和回应response组成
格式:label \t conv1 \t conv2 \t conv3 \t ... \t response
-dst:由多轮对话id、当前轮QA对(使用\1拼接)和对话状态序列state_list(state_list中每个state由空格分割)组成
+dstc2:由多轮对话id、当前轮QA对(使用\1拼接)和对话状态序列state_list(state_list中每个state由空格分割)组成
格式:conversation_id \t question \1 answer \t state1 state2 state3 ...
-dsf:由对话内容conversation_content和标签序列label_list (label_list中每个label由空格分割) 组成, 其中标签序列和对话内容中word为一一对应关系
+atis_slot:由对话内容conversation_content和标签序列label_list (label_list中每个label由空格分割) 组成, 其中标签序列和对话内容中word为一一对应关系
格式:conversation_content \t label1 label2 label3 ...
-did:由标签label和对话内容conversation_content组成
+atis_intent:由标签label和对话内容conversation_content组成
格式: label \t conversation_content
mrda:由多轮对话id、标签label、发言人caller、对话内容conversation_content组成
@@ -140,14 +140,14 @@ swda:由多轮对话id、标签label、发言人caller、对话内容conversat
```shell
export CUDA_VISIBLE_DEVICES=0,1
# GPU启动,n_gpu指定训练所用的GPU数量,可以是单卡,也可以多卡。默认会进行训练、验证和评估
-python -u main.py --task_name=drs --data_dir=./DGU_datasets/drs --output_dir=./checkpoints/drs --n_gpu=2
+python -u main.py --task_name=udc --data_dir=./DGU_datasets/udc --output_dir=./checkpoints/udc --n_gpu=2
# 若只需进行评估,do_train设为False,并且必须指定init_from_ckpt
-# python -u main.py --task_name=drs --data_dir=./DGU_datasets/drs --do_train=False --init_from_ckpt=./checkpoints/drs/best
+# python -u main.py --task_name=udc --data_dir=./DGU_datasets/udc --do_train=False --init_from_ckpt=./checkpoints/udc/best
```
以上参数表示:
-* task_name:任务名称,可以为drs、dst、dsf、did、mrda或swda。
+* task_name:任务名称,可以为udc、dstc2、atis_slot、atis_intent、mrda或swda。
* data_dir:训练数据路径。
* output_dir:训练保存模型的文件路径。
* n_gpu:训练所使用的GPU卡的数量,默认为1。
diff --git a/PaddleNLP/examples/dialogue/dgu/args.py b/PaddleNLP/examples/dialogue/dgu/args.py
index df6c37c0..2ae6dce5 100644
--- a/PaddleNLP/examples/dialogue/dgu/args.py
+++ b/PaddleNLP/examples/dialogue/dgu/args.py
@@ -108,7 +108,7 @@ def parse_args():
def set_default_args(args):
args.task_name = args.task_name.lower()
- if args.task_name == 'drs':
+ if args.task_name == 'udc':
if not args.save_steps:
args.save_steps = 1000
if not args.logging_steps:
@@ -119,7 +119,7 @@ def set_default_args(args):
args.max_seq_len = 210
if not args.test_batch_size:
args.test_batch_size = 100
- elif args.task_name == 'dst':
+ elif args.task_name == 'dstc2':
if not args.save_steps:
args.save_steps = 400
if not args.logging_steps:
@@ -132,14 +132,14 @@ def set_default_args(args):
args.max_seq_len = 256
if not args.test_max_seq_len:
args.test_max_seq_len = 512
- elif args.task_name == 'dsf':
+ elif args.task_name == 'atis_slot':
if not args.save_steps:
args.save_steps = 100
if not args.logging_steps:
args.logging_steps = 10
if not args.epochs:
args.epochs = 50
- elif args.task_name == 'did':
+ elif args.task_name == 'atis_intent':
if not args.save_steps:
args.save_steps = 100
if not args.logging_steps:
diff --git a/PaddleNLP/examples/dialogue/dgu/data.py b/PaddleNLP/examples/dialogue/dgu/data.py
index 0e50e0a9..97db6449 100644
--- a/PaddleNLP/examples/dialogue/dgu/data.py
+++ b/PaddleNLP/examples/dialogue/dgu/data.py
@@ -20,7 +20,7 @@ def get_label_map(label_list):
class UDCv1(Dataset):
"""
- The UDCv1 dataset is using in task DRS(Dialogue Response Selection).
+ The UDCv1 dataset is using in task Dialogue Response Selection.
The source dataset is UDCv1(Ubuntu Dialogue Corpus v1.0). See detail at
http://dataset.cs.mcgill.ca/ubuntu-corpus-1.0/
"""
@@ -107,7 +107,7 @@ class UDCv1(Dataset):
class DSTC2(Dataset):
"""
- The dataset DSTC2 is using in task DST(Dialogue State Tracking).
+ The dataset DSTC2 is using in task Dialogue State Tracking.
The source dataset is DSTC2(Dialog State Tracking Challenges 2). See detail at
https://github.com/matthen/dstc
"""
@@ -207,7 +207,7 @@ class DSTC2(Dataset):
class ATIS_DSF(Dataset):
"""
- The dataset ATIS_DSF is using in task DSF(Dialogue Slot Filling).
+ The dataset ATIS_DSF is using in task Dialogue Slot Filling.
The source dataset is ATIS(Airline Travel Information System). See detail at
https://www.kaggle.com/siddhadev/ms-cntk-atis
"""
@@ -281,7 +281,7 @@ class ATIS_DSF(Dataset):
class ATIS_DID(Dataset):
"""
- The dataset ATIS_ID is using in task DID(Dialogue Intent Detection).
+ The dataset ATIS_ID is using in task Dialogue Intent Detection.
The source dataset is ATIS(Airline Travel Information System). See detail at
https://www.kaggle.com/siddhadev/ms-cntk-atis
"""
@@ -441,7 +441,7 @@ def truncate_and_concat(pre_txt: List[str],
class MRDA(Dataset):
"""
- The dataset MRDA is using in task DA(Dialogue Act).
+ The dataset MRDA is using in task Dialogue Act.
The source dataset is MRDA(Meeting Recorder Dialogue Act). See detail at
https://www.aclweb.org/anthology/W04-2319.pdf
"""
@@ -479,7 +479,7 @@ class MRDA(Dataset):
class SwDA(Dataset):
"""
- The dataset SwDA is using in task DA(Dialogue Act).
+ The dataset SwDA is using in task Dialogue Act.
The source dataset is SwDA(Switchboard Dialog Act). See detail at
http://compprag.christopherpotts.net/swda.html
"""
diff --git a/PaddleNLP/examples/dialogue/dgu/main.py b/PaddleNLP/examples/dialogue/dgu/main.py
index fe5cca2e..b7a12664 100644
--- a/PaddleNLP/examples/dialogue/dgu/main.py
+++ b/PaddleNLP/examples/dialogue/dgu/main.py
@@ -22,10 +22,10 @@ import data
import metric
TASK_CLASSES = {
- 'drs': (data.UDCv1, metric.RecallAtK),
- 'dst': (data.DSTC2, metric.JointAccuracy),
- 'dsf': (data.ATIS_DSF, metric.F1Score),
- 'did': (data.ATIS_DID, Accuracy),
+ 'udc': (data.UDCv1, metric.RecallAtK),
+ 'dstc2': (data.DSTC2, metric.JointAccuracy),
+ 'atis_slot': (data.ATIS_DSF, metric.F1Score),
+ 'atis_intent': (data.ATIS_DID, Accuracy),
'mrda': (data.MRDA, Accuracy),
'swda': (data.SwDA, Accuracy)
}
@@ -70,18 +70,20 @@ class DGULossFunction(nn.Layer):
self.loss_fn = self.get_loss_fn()
def get_loss_fn(self):
- if self.task_name in ['drs', 'dsf', 'did', 'mrda', 'swda']:
+ if self.task_name in [
+ 'udc', 'atis_slot', 'atis_intent', 'mrda', 'swda'
+ ]:
return F.softmax_with_cross_entropy
- elif self.task_name == 'dst':
+ elif self.task_name == 'dstc2':
return nn.BCEWithLogitsLoss(reduction='sum')
def forward(self, logits, labels):
- if self.task_name in ['drs', 'did', 'mrda', 'swda']:
+ if self.task_name in ['udc', 'atis_intent', 'mrda', 'swda']:
loss = self.loss_fn(logits, labels)
loss = paddle.mean(loss)
- elif self.task_name == 'dst':
+ elif self.task_name == 'dstc2':
loss = self.loss_fn(logits, paddle.cast(labels, dtype=logits.dtype))
- elif self.task_name == 'dsf':
+ elif self.task_name == 'atis_slot':
labels = paddle.unsqueeze(labels, axis=-1)
loss = self.loss_fn(logits, labels)
loss = paddle.mean(loss)
@@ -89,8 +91,8 @@ class DGULossFunction(nn.Layer):
def print_logs(args, step, logits, labels, loss, total_time, metric):
- if args.task_name in ['drs', 'did', 'mrda', 'swda']:
- if args.task_name == 'drs':
+ if args.task_name in ['udc', 'atis_intent', 'mrda', 'swda']:
+ if args.task_name == 'udc':
metric = Accuracy()
metric.reset()
correct = metric.compute(logits, labels)
@@ -98,13 +100,13 @@ def print_logs(args, step, logits, labels, loss, total_time, metric):
acc = metric.accumulate()
print('step %d - loss: %.4f - acc: %.4f - %.3fs/step' %
(step, loss, acc, total_time / args.logging_steps))
- elif args.task_name == 'dst':
+ elif args.task_name == 'dstc2':
metric.reset()
metric.update(logits, labels)
joint_acc = metric.accumulate()
print('step %d - loss: %.4f - joint_acc: %.4f - %.3fs/step' %
(step, loss, joint_acc, total_time / args.logging_steps))
- elif args.task_name == 'dsf':
+ elif args.task_name == 'atis_slot':
metric.reset()
metric.update(logits, labels)
f1_micro = metric.accumulate()
@@ -181,13 +183,14 @@ def train(args, model, train_data_loader, dev_data_loader, metric, rank):
batch_start_time = time.time()
+@paddle.no_grad()
def evaluation(args, model, data_loader, metric):
model.eval()
metric.reset()
for batch in data_loader:
input_ids, segment_ids, labels = batch
logits = model(input_ids, segment_ids)
- if args.task_name in ['did', 'mrda', 'swda']:
+ if args.task_name in ['atis_intent', 'mrda', 'swda']:
correct = metric.compute(logits, labels)
metric.update(correct)
else:
@@ -195,17 +198,17 @@ def evaluation(args, model, data_loader, metric):
model.train()
metric_out = metric.accumulate()
print('Total samples: %d' % (len(data_loader) * args.test_batch_size))
- if args.task_name == 'drs':
+ if args.task_name == 'udc':
print('R1@10: %.4f - R2@10: %.4f - R5@10: %.4f\n' %
(metric_out[0], metric_out[1], metric_out[2]))
return metric_out[0]
- elif args.task_name == 'dst':
+ elif args.task_name == 'dstc2':
print('Joint_acc: %.4f\n' % metric_out)
return metric_out
- elif args.task_name == 'dsf':
+ elif args.task_name == 'atis_slot':
print('F1_micro: %.4f\n' % metric_out)
return metric_out
- elif args.task_name in ['did', 'mrda', 'swda']:
+ elif args.task_name in ['atis_intent', 'mrda', 'swda']:
print('Acc: %.4f\n' % metric_out)
return metric_out
@@ -248,7 +251,7 @@ def main(args):
max_seq_length=args.test_max_seq_len)
metric = metric_class()
- if args.task_name in ('drs', 'dst', 'did', 'mrda', 'swda'):
+ if args.task_name in ('udc', 'dstc2', 'atis_intent', 'mrda', 'swda'):
batchify_fn = lambda samples, fn=Tuple(
Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
@@ -256,7 +259,7 @@ def main(args):
): fn(samples)
model = BertForSequenceClassification.from_pretrained(
args.model_name_or_path, num_classes=dataset_class.num_classes())
- elif args.task_name == 'dsf':
+ elif args.task_name == 'atis_slot':
batchify_fn = lambda samples, fn=Tuple(
Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
--
GitLab