提交 f3e8f301 编写于 作者: G Guo Sheng 提交者: guosheng

Merge pull request #54 from 0YuanZhang0/seq_tag

seq_tag
......@@ -6,7 +6,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
|模型|Precision|Recall|F1-score|
|:-:|:-:|:-:|:-:|
|Lexical Analysis|88.26%|89.20%|88.73%|
|Lexical Analysis|89.57%|89.96%|89.76%|
## 2. 快速开始
......@@ -22,7 +22,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
克隆工具集代码库到本地
```bash
git clone https://github.com/PaddlePaddle/hapi.git
cd hapi/sequence_tagging
cd hapi/examples/sequence_tagging
```
#### 3. 环境依赖
......@@ -70,7 +70,7 @@ python -u train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
GPU上多卡训练
......@@ -84,7 +84,7 @@ python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
CPU上训练
......@@ -95,7 +95,7 @@ python -u train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
### 模型预测
......@@ -105,15 +105,13 @@ python -u train.py \
python predict.py \
--init_from_checkpoint model_baseline/params \
--output_file predict.result \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
### 模型评估
......@@ -123,14 +121,12 @@ python predict.py \
# baseline model
python eval.py \
--init_from_checkpoint ./model_baseline/params \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
......@@ -196,6 +192,7 @@ Overall Architecture of GRU-CRF-MODEL
├── eval.py # 词法分析评估的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
├── sequence_tagging.yaml # 模型训练、预测、评估相关配置参数
└──reader.py # 文件读取相关函数
```
......
......@@ -35,7 +35,7 @@ FILE_INFO = {
},
'MODEL': {
'name': 'sequence_tagging_dy.tar.gz',
'md5': "1125d374c03c8218b6e47325dcf607e3"
'md5': "6ba37ceea8f1f764ba1fe227295a6a3b"
},
}
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
SequenceTagging eval structure
"""
from __future__ import division
......@@ -25,18 +25,16 @@ import math
import argparse
import numpy as np
from train import SeqTagging
from train import SeqTagging, ChunkEval, LacLoss
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from utils.metrics import chunk_count
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
......@@ -44,48 +42,30 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length'), Input(
[None, None], 'int64', name='target')
]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
eval_path = args.test_file
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
eval_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=eval_path, place=place, mode="test")
eval_dataset = create_dataloader(
eval_generator, place, feed_list=feed_list)
eval_dataset = LacDataLoader(args, place, phase="test")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model = SeqTagging(args, vocab_size, num_labels, mode="test")
model.mode = "test"
model.prepare(inputs=inputs)
model.prepare(
metrics=ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=place)
model.load(args.init_from_checkpoint, skip_mismatch=True)
for data in eval_dataset():
if len(data) == 1:
batch_data = data[0]
targets = np.array(batch_data[2])
else:
batch_data = data
targets = batch_data[2].numpy()
inputs_data = [batch_data[0], batch_data[1]]
crf_decode, length = model.test(inputs=inputs_data)
num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict)
chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
precision, recall, f1 = chunk_evaluator.eval()
print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1))
model.evaluate(eval_dataset.dataloader, batch_size=args.batch_size)
if __name__ == '__main__':
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
SequenceTagging predict structure
"""
from __future__ import division
......@@ -28,14 +28,13 @@ import numpy as np
from train import SeqTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
......@@ -43,26 +42,18 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length')
]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
predict_path = args.predict_file
predict_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=predict_path, place=place, mode="predict")
predict_dataset = create_dataloader(
predict_generator, place, feed_list=feed_list)
predict_dataset = LacDataLoader(args, place, phase="predict")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model = SeqTagging(args, vocab_size, num_labels, mode="predict")
model.mode = "test"
model.prepare(inputs=inputs)
......@@ -70,15 +61,15 @@ def main(args):
model.load(args.init_from_checkpoint, skip_mismatch=True)
f = open(args.output_file, "wb")
for data in predict_dataset():
for data in predict_dataset.dataloader:
if len(data) == 1:
input_data = data[0]
else:
input_data = data
results, length = model.test(inputs=flatten(input_data))
results, length = model.test_batch(inputs=flatten(input_data))
for i in range(len(results)):
word_len = length[i]
word_ids = results[i][: word_len]
word_ids = results[i][:word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids]
f.write("\002".join(tags) + "\n")
......
......@@ -19,12 +19,19 @@ from __future__ import division
from __future__ import print_function
import io
import os
import leveldb
import numpy as np
import shutil
from functools import partial
import paddle
from paddle.io import BatchSampler, DataLoader, Dataset
from paddle.fluid.dygraph.parallel import ParallelEnv
from hapi.distributed import DistributedBatchSampler
class LacDataset(object):
class LacDataset(Dataset):
"""
Load lexical analysis dataset
"""
......@@ -34,6 +41,7 @@ class LacDataset(object):
self.label_dict_path = args.label_dict_path
self.word_rep_dict_path = args.word_rep_dict_path
self._load_dict()
self.examples = []
def _load_dict(self):
self.word2id_dict = self.load_kv_dict(
......@@ -108,152 +116,135 @@ class LacDataset(object):
label_ids.append(label_id)
return label_ids
def file_reader(self,
filename,
mode="train",
batch_size=32,
max_seq_len=126):
def file_reader(self, filename, phase="train"):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
if mode == "train":
headline = next(fread)
self.phase = phase
with io.open(filename, "r", encoding="utf8") as fr:
if phase in ["train", "test"]:
headline = next(fr)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
assert len(headline) == 2 and headline[
0] == "text_a" and headline[1] == "label"
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
elif mode == "test":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
for line in fr:
line_str = line.strip("\n")
if len(line_str) < 1 and len(line_str.split('\t')) < 2:
continue
self.examples.append(line_str)
else:
for idx, line in enumerate(fr):
words = line.strip("\n").split("\t")[0]
self.examples.append(words)
def __getitem__(self, idx):
line_str = self.examples[idx]
if self.phase in ["train", "test"]:
words, labels = line_str.split('\t')
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
yield word_ids, label_ids, words_len
return word_ids, label_ids
else:
for line in fread:
words = line.strip("\n").split('\t')[0]
if words == u"text_a":
continue
if "\002" not in words:
words = [w for w in line_str]
word_ids = self.word_to_ids(words)
else:
word_ids = self.word_to_ids(words.split("\002"))
words_len = np.int64(len(word_ids))
yield word_ids, words_len
return word_ids
fread.close()
def __len__(self):
return wrapper
return len(self.examples)
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def padding_data(max_len, batch_data):
def create_lexnet_data_generator(args, insts, phase="train"):
def padding_data(max_len, batch_data, if_len=False):
padding_batch_data = []
padding_lens = []
for data in batch_data:
data = data[:max_len]
if if_len:
seq_len = np.int64(len(data))
padding_lens.append(seq_len)
data += [0 for _ in range(max_len - len(data))]
padding_batch_data.append(data)
return padding_batch_data
def wrapper():
if mode == "train":
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, seq_lens, batch_labels, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
if len(seq_lens) > 0:
yield batch_words, seq_lens, batch_labels, batch_labels
elif mode == "test":
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
batch_labels.append(labels)
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
if if_len:
return np.array(padding_batch_data), np.array(padding_lens)
else:
batch_words, seq_lens, max_len = [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
batch_words, seq_lens, max_len = [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
return wrapper
def create_dataloader(generator, place, feed_list=None):
if not feed_list:
data_loader = paddle.io.DataLoader.from_generator(
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
return np.array(padding_batch_data)
if phase == "train":
batch_words = [inst[0] for inst in insts]
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
args.max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(args.max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
elif phase == "test":
batch_words = [inst[0] for inst in insts]
seq_len = [len(inst[0]) for inst in insts]
max_seq_len = max(seq_len)
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
else:
batch_words = insts
seq_len = [len(inst) for inst in insts]
max_seq_len = max(seq_len)
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
return [padding_batch_words, padding_lens]
class LacDataLoader(object):
def __init__(self,
args,
place,
phase="train",
shuffle=False,
num_workers=0,
drop_last=False):
assert phase in [
"train", "test", "predict"
], "phase should be in [train, test, predict], but get %s" % phase
if phase == "train":
file_name = args.train_file
elif phase == "test":
file_name = args.test_file
elif phase == "predict":
file_name = args.predict_file
self.dataset = LacDataset(args)
self.dataset.file_reader(file_name, phase=phase)
if phase == "train":
self.sampler = DistributedBatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
else:
data_loader = paddle.io.DataLoader.from_generator(
feed_list=feed_list,
capacity=50,
use_double_buffer=True,
iterable=True,
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self.sampler,
places=place,
collate_fn=partial(
create_lexnet_data_generator, args, phase=phase),
num_workers=num_workers,
return_list=True)
data_loader.set_batch_generator(generator, places=place)
return data_loader
word_dict_path: "./conf/word.dic"
label_dict_path: "./conf/tag.dic"
word_rep_dict_path: "./conf/q2b.dic"
device: "cpu"
device: "gpu"
dynamic: True
epoch: 10
base_learning_rate: 0.001
......@@ -14,7 +14,7 @@ batch_size: 300
max_seq_len: 126
num_devices: 1
save_dir: "model"
init_from_checkpoint: "model_baseline/params"
init_from_checkpoint: ""
init_from_pretrain_model: ""
save_freq: 1
eval_freq: 1
......@@ -22,4 +22,3 @@ output_file: "predict.result"
test_file: "./data/test.tsv"
train_file: "./data/train.tsv"
predict_file: "./data/infer.tsv"
mode: "train"
......@@ -28,21 +28,23 @@ import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.metrics import Metric
from hapi.model import Model, Input, Loss, set_device
from hapi.text.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
__all__ = ["SeqTagging", "LacLoss", "ChunkEval"]
class SeqTagging(Model):
def __init__(self, args, vocab_size, num_labels, length=None):
def __init__(self, args, vocab_size, num_labels, length=None,
mode="train"):
super(SeqTagging, self).__init__()
"""
define the lexical analysis network structure
......@@ -53,7 +55,7 @@ class SeqTagging(Model):
for infer: return the prediction
otherwise: return the prediction
"""
self.mode_type = args.mode
self.mode_type = mode
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
......@@ -65,7 +67,7 @@ class SeqTagging(Model):
self.bigru_num = args.bigru_num
self.batch_size = args.batch_size
self.init_bound = 0.1
self.length=length
self.length = length
self.sequence_tagging = SequenceTagging(
vocab_size=self.vocab_size,
......@@ -207,30 +209,25 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length'),
Input([None, None], 'int64', name='target')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length'), Input(
[None, None], 'int64', name='target')
]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs + labels]
dataset = LacDataset(args)
train_path = args.train_file
test_path = args.test_file
feed_list = None if args.dynamic else [
x.forward() for x in inputs + labels
]
train_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=train_path, place=place, mode="train")
test_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=test_path, place=place, mode="test")
train_dataset = create_dataloader(
train_generator, place, feed_list=feed_list)
test_dataset = create_dataloader(
test_generator, place, feed_list=feed_list)
dataset = LacDataset(args)
train_dataset = LacDataLoader(args, place, phase="train")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
model = SeqTagging(args, vocab_size, num_labels, mode="train")
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
......@@ -250,8 +247,7 @@ def main(args):
if args.init_from_pretrain_model:
model.load(args.init_from_pretrain_model, reset_optimizer=True)
model.fit(train_dataset,
test_dataset,
model.fit(train_dataset.dataloader,
epochs=args.epoch,
batch_size=args.batch_size,
eval_freq=args.eval_freq,
......
......@@ -195,13 +195,19 @@ class PDConfig(object):
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
self.default_g.add_arg(
"do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.default_g.add_arg(
"is_profiler", int, 0,
"the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg(
"profiler_path", str, './',
"the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0,
"the max train batch num.(used for benchmark)")
self.parser = parser
......
......@@ -63,8 +63,8 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
assert infer_numpy.shape[0] == label_numpy.shape[0]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][: seq_len[i]]
label_list = label_numpy[i][: seq_len[i]]
infer_list = infer_numpy[i][:seq_len[i]]
label_list = label_numpy[i][:seq_len[i]]
infer_dict = build_chunk(infer_list, id2label_dict)
num_infer_chunks += len(infer_dict)
label_dict = build_chunk(label_list, id2label_dict)
......@@ -73,4 +73,3 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
if key in label_dict and label_dict[key] == infer_dict[key]:
num_correct_chunks += 1
return num_infer_chunks, num_label_chunks, num_correct_chunks
......@@ -19,6 +19,7 @@ from __future__ import print_function
import os
import six
import sys
if six.PY2:
reload(sys)
sys.setdefaultencoding('utf8')
......@@ -37,7 +38,7 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.layers.utils as utils
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm, GRUUnit
from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm, GRUUnit, Conv2D
from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid import layers
......@@ -49,7 +50,8 @@ __all__ = [
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf',
'Crf_decoding', 'SequenceTagging'
'Crf_decoding', 'SequenceTagging', 'GRUEncoderLayer', 'CNNEncoder',
'BOWEncoder', 'SimpleConvPoolLayer', 'GRUEncoder', 'DynamicGRU', 'LSTMEncoder'
]
......@@ -87,12 +89,12 @@ class RNNCell(Layer):
batch_ref = flatten(batch_ref)[0]
def _is_shape_sequence(seq):
if sys.version_info < (3, ):
if sys.version_info < (3,):
integer_types = (
int,
long, )
long,)
else:
integer_types = (int, )
integer_types = (int,)
"""For shape, list/tuple of integer is the finest-grained objection"""
if (isinstance(seq, list) or isinstance(seq, tuple)):
if reduce(
......@@ -763,7 +765,7 @@ class BasicGRUCell(RNNCell):
c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
return new_hidden
return new_hidden, new_hidden
@property
def state_shape(self):
......@@ -1217,7 +1219,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention
product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
x=q, y=k, transpose_y=True, alpha=self.d_model ** -0.5)
if attn_bias:
product += attn_bias
weights = layers.softmax(product)
......@@ -1307,7 +1309,6 @@ class TransformerEncoderLayer(Layer):
reused_ffn_weights={"reused_fc1": None,
"reused_fc2": None},
reused_post_ffn_layernorm=None):
super(TransformerEncoderLayer, self).__init__()
self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model,
......@@ -1555,7 +1556,7 @@ class TransformerDecoder(Layer):
]
#TODO: we should merge GRUCell with BasicGRUCell
# TODO: we should merge GRUCell with BasicGRUCell
class GRUCell(RNNCell):
def __init__(self,
input_size,
......@@ -1589,7 +1590,7 @@ class GRUCell(RNNCell):
return [self.hidden_size]
#TODO: we should merge GRUCell with BasicGRUCell
# TODO: we should merge GRUCell with BasicGRUCell
class GRUEncoderCell(RNNCell):
def __init__(self,
num_layers,
......@@ -1605,7 +1606,7 @@ class GRUEncoderCell(RNNCell):
self.gru_cells.append(
self.add_sublayer(
"gru_%d" % i,
#BasicGRUCell(
# BasicGRUCell(
GRUCell(
input_size=input_size if i == 0 else hidden_size,
hidden_size=hidden_size,
......@@ -1672,7 +1673,6 @@ class Linear_chain_crf(fluid.dygraph.Layer):
self._transition = value
def forward(self, input, label, length=None):
alpha = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
emission_exps = self._helper.create_variable_for_type_inference(
......@@ -1723,7 +1723,6 @@ class Crf_decoding(fluid.dygraph.Layer):
self._transition = value
def forward(self, input, label=None, length=None):
viterbi_path = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
......@@ -1741,6 +1740,64 @@ class Crf_decoding(fluid.dygraph.Layer):
return viterbi_path
class GRUEncoderLayer(Layer):
def __init__(self,
input_dim,
grnn_hidden_dim,
init_bound,
num_layers=1,
h_0=None,
is_bidirection=False):
super(GRUEncoderLayer, self).__init__()
self.h_0 = h_0
self.num_layers = num_layers
self.is_bidirection = is_bidirection
self.gru_list = []
self.gru_r_list = []
for i in range(num_layers):
self.basic_gru_cell = BasicGRUCell(
input_size=input_dim if i == 0 else input_dim * 2,
hidden_size=grnn_hidden_dim,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_list.append(
self.add_sublayer(
"gru_%d" % i,
RNN(self.basic_gru_cell,
is_reverse=False,
time_major=False)))
if self.is_bidirection:
for i in range(num_layers):
self.basic_gru_cell_r = BasicGRUCell(
input_size=input_dim if i == 0 else input_dim * 2,
hidden_size=grnn_hidden_dim,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r_list.append(
self.add_sublayer(
"gru_r_%d" % i,
RNN(self.basic_gru_cell_r,
is_reverse=True,
time_major=False)))
def forward(self, input_feature):
for i in range(self.num_layers):
pre_gru, pre_state = self.gru_list[i](input_feature)
if self.is_bidirection:
gru_r, r_state = self.gru_r_list[i](input_feature)
out = fluid.layers.concat(input=[pre_gru, gru_r], axis=-1)
else:
out = pre_gru
input_feature = out
return out
class SequenceTagging(fluid.dygraph.Layer):
def __init__(self,
vocab_size,
......@@ -1790,26 +1847,13 @@ class SequenceTagging(fluid.dygraph.Layer):
force_cpu=True,
name='h_0')
self.bigru_units = []
for i in range(self.bigru_num):
if i == 0:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
else:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim * 2,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
self.gru_encoder = GRUEncoderLayer(
input_dim=self.grnn_hidden_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
init_bound=self.init_bound,
num_layers=self.bigru_num,
h_0=h_0,
is_bidirection=True)
self.fc = Linear(
input_dim=self.grnn_hidden_dim * 2,
......@@ -1837,10 +1881,7 @@ class SequenceTagging(fluid.dygraph.Layer):
word_embed = self.word_embedding(word)
input_feature = word_embed
for i in range(self.bigru_num):
bigru_output = self.bigru_units[i](input_feature)
input_feature = bigru_output
bigru_output = self.gru_encoder(input_feature)
emission = self.fc(bigru_output)
if target is not None:
......@@ -1854,3 +1895,227 @@ class SequenceTagging(fluid.dygraph.Layer):
self.linear_chain_crf.weight = self.crf_decoding.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, lengths
class SimpleConvPoolLayer(Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
use_cudnn=False,
act=None
):
super(SimpleConvPoolLayer, self).__init__()
self._conv2d = Conv2D(num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
padding=[1, 1],
use_cudnn=use_cudnn,
act=act)
def forward(self, input):
x = self._conv2d(input)
x = fluid.layers.reduce_max(x, dim=-1)
x = fluid.layers.reshape(x, shape=[x.shape[0], -1])
return x
class CNNEncoder(Layer):
"""
simple CNNEncoder for simnet
"""
def __init__(self,
dict_size,
emb_dim,
filter_size,
num_filters,
hidden_dim,
seq_len,
padding_idx,
act
):
super(CNNEncoder, self).__init__()
self.dict_size = dict_size
self.emb_dim = emb_dim
self.filter_size = filter_size
self.num_filters = num_filters
self.hidden_dim = hidden_dim
self.seq_len = seq_len
self.padding_idx = padding_idx
self.act = act
self.channels = 1
self.emb_layer = Embedding(size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(name='emb', initializer=fluid.initializer.Xavier()))
self.cnn_layer = SimpleConvPoolLayer(
self.channels,
self.num_filters,
self.filter_size,
use_cudnn=False,
act=self.act
)
def forward(self, input):
emb = self.emb_layer(input)
emb_reshape = fluid.layers.reshape(
emb, shape=[-1, self.channels, self.seq_len, self.hidden_dim])
emb_out=self.cnn_layer(emb_reshape)
return emb_out
class BOWEncoder(Layer):
"""
simple BOWEncoder for simnet
"""
def __init__(self,
dict_size,
emb_dim,
bow_dim,
seq_len,
padding_idx
):
super(BOWEncoder, self).__init__()
self.dict_size = dict_size
self.bow_dim = bow_dim
self.seq_len = seq_len
self.emb_dim = emb_dim
self.padding_idx=padding_idx
self.emb_layer = Embedding(size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(name='emb', initializer=fluid.initializer.Xavier()))
def forward(self, input):
emb = self.emb_layer(input)
emb_reshape = fluid.layers.reshape(
emb, shape=[-1, self.seq_len, self.bow_dim])
bow_emb = fluid.layers.reduce_sum(emb_reshape, dim=1)
return bow_emb
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
size,
h_0=None,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class GRUEncoder(Layer):
"""
simple GRUEncoder for simnet
"""
def __init__(self,
dict_size,
emb_dim,
gru_dim,
hidden_dim,
padding_idx,
seq_len
):
super(GRUEncoder, self).__init__()
self.dict_size = dict_size
self.emb_dim = emb_dim
self.gru_dim = gru_dim
self.seq_len=seq_len
self.hidden_dim = hidden_dim
self.padding_idx=self.padding_idx
self.emb_layer = Embedding(size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(name='emb',
initializer=fluid.initializer.Xavier()))
self.gru_layer = DynamicGRU(self.gru_dim)
self.proj_layer = Linear(input_dim=self.hidden_dim, output_dim=self.gru_dim * 3)
def forward(self, input):
emb = self.emb_layer(input)
emb_proj = self.proj_layer(emb)
h_0 = np.zeros((emb_proj.shape[0], self.hidden_dim), dtype="float32")
h_0 = to_variable(h_0)
gru = self.gru_layer(emb_proj, h_0=h_0)
gru = fluid.layers.reduce_max(gru, dim=1)
gru = fluid.layers.tanh(gru)
return gru
class LSTMEncoder(Layer):
"""
simple LSTMEncoder for simnet
"""
def __init__(self,
dict_size,
emb_dim,
lstm_dim,
hidden_dim,
seq_len,
padding_idx,
is_reverse
):
"""
initialize
"""
super(LSTMEncoder, self).__init__()
self.dict_size = dict_size
self.emb_dim = emb_dim
self.lstm_dim = lstm_dim
self.hidden_dim = hidden_dim
self.seq_len = seq_len
self.is_reverse = False
self.padding_idx=padding_idx
self.emb_layer = Embedding(size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=fluid.ParamAttr(name='emb', initializer=fluid.initializer.Xavier()))
self.lstm_cell = BasicLSTMCell(
hidden_size=self.lstm_dim, input_size=self.lstm_dim * 4
)
self.lstm_layer = RNN(
cell=self.lstm_cell, time_major=True, is_reverse=self.is_reverse
)
self.proj_layer = Linear(input_dim=self.hidden_dim, output_dim=self.lstm_dim * 4)
def forward(self, input):
emb = self.emb_layer(input)
emb_proj = self.proj_layer(emb)
emb_lstm, _ = self.lstm_layer(emb_proj)
emb_reduce = fluid.layers.reduce_max(emb_lstm, dim=1)
emb_reshape = fluid.layers.reshape(
emb_reduce, shape=[-1, self.seq_len, self.hidden_dim])
emb_lstm = fluid.layers.reduce_sum(emb_reshape, dim=1)
emb_last = fluid.layers.tanh(emb_lstm)
return emb_last
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册