提交 188d9018 编写于 作者: K kinghuin

fix conflict

#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import numpy as np
import os
import time
import paddle
import paddle.fluid as fluid
import paddlehub as hub
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
parser.add_argument("--network", type=str, default='bilstm', help="Pre-defined network which was connected after Transformer model, such as ERNIE, BERT ,RoBERTa and ELECTRA.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub ERNIE Tiny pretrained model
module = hub.Module(name="ernie_tiny")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use accuracy as metrics
# Choose dataset: GLUE/XNLI/ChinesesGLUE/NLPCC-DBQA/LCQMC
dataset = hub.dataset.ChnSentiCorp()
# For ernie_tiny, it use sub-word to tokenize chinese sentence
# If not ernie tiny, sp_model_path and word_dict_path should be set None
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=module.get_spm_path(),
word_dict_path=module.get_word_dict_path())
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
token_feature = outputs["sequence_output"]
# Setup feed list for data feeder
# Must feed all the tensor of module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API
# network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network)
# If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module,
# you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask,
# rather than outputs["pooled_output"], and feature is None
cls_task = hub.TextClassifierTask(
data_reader=reader,
token_feature=token_feature,
feed_list=feed_list,
network=args.network,
num_classes=dataset.num_labels,
config=config)
# Data to be prdicted
data = [["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"],
["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]]
print(cls_task.predict(data=data, return_result=True))
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_chnsenticorp"
python -u text_cls.py \
--batch_size=24 \
--use_gpu=True \
--checkpoint_dir=${CKPT_DIR} \
--learning_rate=5e-5 \
--weight_decay=0.01 \
--max_seq_len=128 \
--warmup_proportion=0.1 \
--num_epoch=3 \
--use_data_parallel=True
# The sugguested hyper parameters for difference task
# for ChineseGLUE:
# TNews: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# XNLI_zh: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
# INEWS: batch_size=4, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
# DRCD: see demo: reading-comprehension
# CMRC2018: see demo: reading-comprehension
# BQ: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=100, lr=1e-5
# MSRANER: see demo: sequence-labeling
# THUCNEWS: batch_size=8, weight_decay=0, num_epoch=2, max_seq_len=512, lr=5e-5
# IFLYTEKDATA: batch_size=16, weight_decay=0, num_epoch=5, max_seq_len=256, lr=1e-5
# for other tasks:
# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
# QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# Specify the matched/mismatched dev and test dataset with an underscore.
# mnli_m or mnli: dev and test in matched dataset.
# mnli_mm: dev and test in mismatched dataset.
# The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
# If you are not sure which one to pick, just use mnli or mnli_m.
# XNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# Specify the language with an underscore like xnli_zh.
# ar- Arabic bg- Bulgarian de- German
# el- Greek en- English es- Spanish
# fr- French hi- Hindi ru- Russian
# sw- Swahili th- Thai tr- Turkish
# ur- Urdu vi- Vietnamese zh- Chinese (Simplified)
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_chnsenticorp"
CKPT_DIR="./ckpt_chnsenticorp_predefine_net"
python -u text_classifier.py \
python -u text_cls_predefine_net.py \
--batch_size=24 \
--use_gpu=True \
--checkpoint_dir=${CKPT_DIR} \
......@@ -12,7 +12,8 @@ python -u text_classifier.py \
--max_seq_len=128 \
--warmup_proportion=0.1 \
--num_epoch=3 \
--use_data_parallel=True
--use_data_parallel=True \
--network=bilstm
# The sugguested hyper parameters for difference task
# for ChineseGLUE:
......
......@@ -3,7 +3,8 @@ export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_chnsenticorp"
python -u predict.py --checkpoint_dir=$CKPT_DIR \
--max_seq_len=128 \
--use_gpu=True \
--batch_size=24 \
python -u predict.py \
--checkpoint_dir=$CKPT_DIR \
--max_seq_len=128 \
--use_gpu=True \
--batch_size=24
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_chnsenticorp_predefine_net"
python -u predict_predefine_net.py \
--checkpoint_dir=$CKPT_DIR \
--max_seq_len=128 \
--use_gpu=True \
--batch_size=24 \
--network=bilstm
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finetuning on classification task """
import argparse
import ast
import paddlehub as hub
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--network", type=str, default='bilstm', help="Pre-defined network which was connected after Transformer model, such as ERNIE, BERT ,RoBERTa and ELECTRA.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub ERNIE Tiny pretrained model
module = hub.Module(name="ernie_tiny")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use accuracy as metrics
# Choose dataset: GLUE/XNLI/ChinesesGLUE/NLPCC-DBQA/LCQMC
# metric should be acc, f1 or matthews
dataset = hub.dataset.ChnSentiCorp()
metrics_choices = ["acc"]
# For ernie_tiny, it use sub-word to tokenize chinese sentence
# If not ernie tiny, sp_model_path and word_dict_path should be set None
reader = hub.reader.ClassifyReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len,
sp_model_path=module.get_spm_path(),
word_dict_path=module.get_word_dict_path())
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
token_feature = outputs["sequence_output"]
# Setup feed list for data feeder
# Must feed all the tensor of module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Select finetune strategy, setup config and finetune
strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay,
learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Define a classfication finetune task by PaddleHub's API
# network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network)
# If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module,
# you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask,
# rather than outputs["pooled_output"], and feature is None
cls_task = hub.TextClassifierTask(
data_reader=reader,
token_feature=token_feature,
feed_list=feed_list,
network=args.network,
num_classes=dataset.num_labels,
config=config,
metrics_choices=metrics_choices)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
cls_task.finetune_and_eval()
......@@ -28,6 +28,7 @@ from . import io
from . import dataset
from . import finetune
from . import reader
from . import network
from .common.dir import USER_HOME
from .common.dir import HUB_HOME
......
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
......
......@@ -36,7 +36,7 @@ from visualdl import LogWriter
import paddlehub as hub
from paddlehub.common.paddle_helper import dtype_map, clone_program
from paddlehub.common.utils import mkdir, version_compare
from paddlehub.common.utils import mkdir
from paddlehub.common.dir import tmp_dir
from paddlehub.common.logger import logger
from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint
......@@ -992,17 +992,12 @@ class BaseTask(object):
Returns:
RunState: the running result of predict phase
"""
if accelerate_mode:
if not version_compare(paddle.__version__, "1.6.1"):
logger.warning(
"Fail to open predict accelerate mode as it does not support paddle < 1.6.2. Please update PaddlePaddle."
)
accelerate_mode = False
if isinstance(self._base_data_reader, hub.reader.LACClassifyReader):
logger.warning(
"LACClassifyReader does not support predictor, the accelerate_mode is closed now."
)
accelerate_mode = False
if accelerate_mode and isinstance(self._base_data_reader,
hub.reader.LACClassifyReader):
logger.warning(
"LACClassifyReader does not support predictor, the accelerate_mode is closed now."
)
accelerate_mode = False
self.accelerate_mode = accelerate_mode
with self.phase_guard(phase="predict"):
......
......@@ -17,12 +17,17 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import paddle
import paddle.fluid as fluid
import time
from paddlehub.common.logger import logger
from paddlehub.finetune.evaluate import calculate_f1_np, matthews_corrcoef
from paddlehub.reader.nlp_reader import ClassifyReader
import paddlehub.network as net
from .base_task import BaseTask
......@@ -104,7 +109,7 @@ class ClassifierTask(BaseTask):
run_examples += run_state.run_examples
run_step += run_state.run_step
loss_sum += np.mean(
run_state.run_results[-1]) * run_state.run_examples
run_state.run_results[-2]) * run_state.run_examples
acc_sum += np.mean(
run_state.run_results[2]) * run_state.run_examples
np_labels = run_state.run_results[0]
......@@ -147,7 +152,7 @@ class ClassifierTask(BaseTask):
results = []
for batch_state in run_states:
batch_result = batch_state.run_results
batch_infer = np.argmax(batch_result, axis=2)[0]
batch_infer = np.argmax(batch_result[0], axis=1)
results += [id2label[sample_infer] for sample_infer in batch_infer]
return results
......@@ -156,21 +161,73 @@ ImageClassifierTask = ClassifierTask
class TextClassifierTask(ClassifierTask):
"""
Create a text classification task.
It will use full-connect layer with softmax activation function to classify texts.
"""
def __init__(self,
feature,
num_classes,
feed_list,
data_reader,
feature=None,
token_feature=None,
network=None,
startup_program=None,
config=None,
hidden_units=None,
metrics_choices="default"):
"""
Args:
num_classes: total labels of the text classification task.
feed_list(list): the variable name that will be feeded to the main program
data_reader(object): data reader for the task. It must be one of ClassifyReader and LACClassifyReader.
feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
main_program (object): the customized main program, default None.
startup_program (object): the customized startup program, default None.
config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.
metrics_choices(list): metrics used to the task, default ["acc"].
"""
if (not feature) and (not token_feature):
logger.error(
'Both token_feature and feature are None, one of them must be setted.'
)
exit(1)
elif feature and token_feature:
logger.error(
'Both token_feature and feature are setted. One should be setted, the other should be None.'
)
exit(1)
if network:
assert network in [
'bilstm', 'bow', 'cnn', 'dpcnn', 'gru', 'lstm'
], 'network choice must be one of bilstm, bow, cnn, dpcnn, gru, lstm!'
assert token_feature and (
not feature
), 'If you wanna use network, you must set token_feature ranther than feature for TextClassifierTask!'
assert len(
token_feature.shape
) == 3, 'When you use network, the parameter token_feature must be the token-level feature, such as the sequence_output of ERNIE, BERT, RoBERTa and ELECTRA module.'
else:
assert feature and (
not token_feature
), 'If you do not use network, you must set feature ranther than token_feature for TextClassifierTask!'
assert len(
feature.shape
) == 2, 'When you do not use network, the parameter feture must be the sentence-level feature, such as the pooled_output of ERNIE, BERT, RoBERTa and ELECTRA module.'
self.network = network
if metrics_choices == "default":
metrics_choices = ["acc"]
super(TextClassifierTask, self).__init__(
data_reader=data_reader,
feature=feature,
feature=feature if feature else token_feature,
num_classes=num_classes,
feed_list=feed_list,
startup_program=startup_program,
......@@ -179,10 +236,33 @@ class TextClassifierTask(ClassifierTask):
metrics_choices=metrics_choices)
def _build_net(self):
cls_feats = fluid.layers.dropout(
x=self.feature,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if isinstance(self._base_data_reader, ClassifyReader):
# ClassifyReader will return the seqence length of an input text
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64', lod_level=0)
self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
# unpad the token_feature
unpad_feature = fluid.layers.sequence_unpad(
self.feature, length=self.seq_len_used)
if self.network:
# add pre-defined net
net_func = getattr(net.classification, self.network)
if self.network == 'dpcnn':
# deepcnn network is no need to unpad
cls_feats = net_func(
self.feature, emb_dim=self.feature.shape[-1])
else:
cls_feats = net_func(unpad_feature)
logger.info(
"%s has been added in the TextClassifierTask!" % self.network)
else:
# not use pre-defined net but to use fc net
cls_feats = fluid.layers.dropout(
x=self.feature,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if self.hidden_units is not None:
for n_hidden in self.hidden_units:
......@@ -204,6 +284,33 @@ class TextClassifierTask(ClassifierTask):
return [logits]
@property
def feed_list(self):
feed_list = [varname for varname in self._base_feed_list]
if isinstance(self._base_data_reader, ClassifyReader):
# ClassifyReader will return the seqence length of an input text
feed_list += [self.seq_len.name]
if self.is_train_phase or self.is_test_phase:
feed_list += [self.labels[0].name]
return feed_list
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
fetch_list = [
self.labels[0].name, self.ret_infers.name, self.metrics[0].name,
self.loss.name
]
else:
# predict phase
fetch_list = [self.outputs[0].name]
if isinstance(self._base_data_reader, ClassifyReader):
# to avoid save_inference_model to prune seq_len variable
fetch_list += [self.seq_len.name]
return fetch_list
class MultiLabelClassifierTask(ClassifierTask):
def __init__(self,
......
......@@ -66,11 +66,7 @@ class SequenceLabelTask(BaseTask):
def _build_net(self):
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64', lod_level=0)
if version_compare(paddle.__version__, "1.6"):
self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
else:
self.seq_len_used = self.seq_len
self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
if self.add_crf:
unpad_feature = fluid.layers.sequence_unpad(
......
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import classification
# coding:utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provide nets for text classification
"""
import paddle
import paddle.fluid as fluid
def bilstm(token_embeddings, hid_dim=128, hid_dim2=96):
"""
bilstm net
"""
fc0 = fluid.layers.fc(input=token_embeddings, size=hid_dim * 4)
rfc0 = fluid.layers.fc(input=token_embeddings, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
rlstm_h, c = fluid.layers.dynamic_lstm(
input=rfc0, size=hid_dim * 4, is_reverse=True)
lstm_last = fluid.layers.sequence_last_step(input=lstm_h)
rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h)
lstm_last_tanh = fluid.layers.tanh(lstm_last)
rlstm_last_tanh = fluid.layers.tanh(rlstm_last)
# concat layer
lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1)
# full connect layer
fc = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh')
return fc
def bow(token_embeddings, hid_dim=128, hid_dim2=96):
"""
bow net
"""
# bow layer
bow = fluid.layers.sequence_pool(input=token_embeddings, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
return fc_2
def cnn(token_embeddings, hid_dim=128, win_size=3):
"""
cnn net
"""
# cnn layer
conv = fluid.nets.sequence_conv_pool(
input=token_embeddings,
num_filters=hid_dim,
filter_size=win_size,
act="tanh",
pool_type="max")
# full connect layer
fc_1 = fluid.layers.fc(input=conv, size=hid_dim)
return fc_1
def dpcnn(token_embeddings,
hid_dim=128,
channel_size=250,
emb_dim=1024,
blocks=6):
"""
deepcnn net
"""
def _block(x):
x = fluid.layers.relu(x)
x = fluid.layers.conv2d(x, channel_size, (3, 1), padding=(1, 0))
x = fluid.layers.relu(x)
x = fluid.layers.conv2d(x, channel_size, (3, 1), padding=(1, 0))
return x
emb = fluid.layers.unsqueeze(token_embeddings, axes=[1])
region_embedding = fluid.layers.conv2d(
emb, channel_size, (3, emb_dim), padding=(1, 0))
conv_features = _block(region_embedding)
conv_features = conv_features + region_embedding
# multi-cnn layer
for i in range(blocks):
block_features = fluid.layers.pool2d(
conv_features,
pool_size=(3, 1),
pool_stride=(2, 1),
pool_padding=(1, 0))
conv_features = _block(block_features)
conv_features = block_features + conv_features
features = fluid.layers.pool2d(conv_features, global_pooling=True)
features = fluid.layers.squeeze(features, axes=[2, 3])
# full connect layer
fc_1 = fluid.layers.fc(input=features, size=hid_dim, act="tanh")
return fc_1
def gru(token_embeddings, hid_dim=128, hid_dim2=96):
"""
gru net
"""
fc0 = fluid.layers.fc(input=token_embeddings, size=hid_dim * 3)
gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
gru_max_tanh = fluid.layers.tanh(gru_max)
fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
return fc1
def lstm(token_embeddings, hid_dim=128, hid_dim2=96):
"""
lstm net
"""
# lstm layer
fc0 = fluid.layers.fc(input=token_embeddings, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
# max pooling layer
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
# full connect layer
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
return fc1
......@@ -65,7 +65,6 @@ class BaseNLPReader(BaseReader):
logger.warning(
"use_task_id has been de discarded since PaddleHub v1.4.0, it's no necessary to feed task_ids now."
)
self.task_id = 0
self.Record_With_Label_Id = namedtuple(
'Record',
......@@ -272,11 +271,12 @@ class ClassifyReader(BaseNLPReader):
batch_text_type_ids = [record.text_type_ids for record in batch_records]
batch_position_ids = [record.position_ids for record in batch_records]
padded_token_ids, input_mask = pad_batch_data(
padded_token_ids, input_mask, batch_seq_lens = pad_batch_data(
batch_token_ids,
max_seq_len=self.max_seq_len,
pad_idx=self.pad_id,
return_input_mask=True)
return_input_mask=True,
return_seq_lens=True)
padded_text_type_ids = pad_batch_data(
batch_text_type_ids,
max_seq_len=self.max_seq_len,
......@@ -286,36 +286,16 @@ class ClassifyReader(BaseNLPReader):
max_seq_len=self.max_seq_len,
pad_idx=self.pad_id)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_seq_lens
]
if phase != "predict":
batch_labels = [record.label_id for record in batch_records]
batch_labels = np.array(batch_labels).astype("int64").reshape(
[-1, 1])
return_list += [batch_labels]
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_labels
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_labels
]
else:
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids
]
return return_list
......@@ -369,40 +349,20 @@ class SequenceLabelReader(BaseNLPReader):
max_seq_len=self.max_seq_len,
pad_idx=self.pad_id)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if phase != "predict":
batch_label_ids = [record.label_id for record in batch_records]
padded_label_ids = pad_batch_data(
batch_label_ids,
max_seq_len=self.max_seq_len,
pad_idx=len(self.label_map) - 1)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_label_ids, batch_seq_lens
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, padded_label_ids,
batch_seq_lens
]
return_list += [padded_label_ids, batch_seq_lens]
else:
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_seq_lens
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_seq_lens
]
return_list += [batch_seq_lens]
return return_list
......@@ -514,37 +474,18 @@ class MultiLabelClassifyReader(BaseNLPReader):
max_seq_len=self.max_seq_len,
pad_idx=self.pad_id)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if phase != "predict":
batch_labels_ids = [record.label_id for record in batch_records]
num_label = len(self.dataset.get_labels())
batch_labels = np.array(batch_labels_ids).astype("int64").reshape(
[-1, num_label])
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_labels
]
return_list += [batch_labels]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_labels
]
else:
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids
]
return return_list
def _convert_example_to_record(self,
......@@ -634,37 +575,17 @@ class RegressionReader(BaseNLPReader):
max_seq_len=self.max_seq_len,
pad_idx=self.pad_id)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if phase != "predict":
batch_labels = [record.label_id for record in batch_records]
# the only diff with ClassifyReader: astype("float32")
batch_labels = np.array(batch_labels).astype("float32").reshape(
[-1, 1])
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_labels
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_labels
]
else:
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids
]
return_list += [batch_labels]
return return_list
......@@ -831,6 +752,10 @@ class ReadingComprehensionReader(BaseNLPReader):
pad_idx=self.pad_id,
max_seq_len=self.max_seq_len)
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_unique_ids
]
if phase != "predict":
batch_start_position = [
record.start_position for record in batch_records
......@@ -843,33 +768,8 @@ class ReadingComprehensionReader(BaseNLPReader):
batch_end_position = np.array(batch_end_position).astype(
"int64").reshape([-1, 1])
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_unique_ids, batch_start_position,
batch_end_position
]
return_list += [batch_start_position, batch_end_position]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_unique_ids,
batch_start_position, batch_end_position
]
else:
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, batch_unique_ids
]
if self.use_task_id:
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_position_ids, padded_text_type_ids,
input_mask, padded_task_ids, batch_unique_ids
]
return return_list
def _prepare_batch_data(self, records, batch_size, phase=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册