提交 b9ad9775 编写于 作者: B BinLong

Merge branch 'develop' of github.com:PaddlePaddle/PaddleHub into develop

......@@ -36,6 +36,8 @@ $ pip install --upgrade paddlepaddle
--checkpoint_dir: 模型保存路径,PaddleHub会自动保存验证集上表现最好的模型。默认为paddlehub_finetune_ckpt
--dataset: 使用什么数据集进行finetune, 脚本支持分别是{flowers/dogcat/stanforddogs/indoor67/food101}。默认为flowers
--use_gpu: 是否使用GPU进行训练,如果机器支持GPU且安装了GPU版本的PaddlePaddle,我们建议您打开这个开关。默认关闭
--use_data_parallel: 是否使用数据并行,打开该开关时,会将数据分散到不同的卡上进行训练(CPU下会分布到不同线程)。默认关闭
--use_pyreader: 是否使用pyreader进行数据喂入。默认关闭
```
## 进行预测
......@@ -51,6 +53,7 @@ $ pip install --upgrade paddlepaddle
--checkpoint_dir: 模型保存路径,PaddleHub会自动保存验证集上表现最好的模型。默认为paddlehub_finetune_ckpt
--dataset: 使用什么数据集进行finetune, 脚本支持分别是{flowers/dogcat}。默认为flowers
--use_gpu: 使用使用GPU进行训练,如果本机支持GPU且安装了GPU版本的PaddlePaddle,我们建议您打开这个开关。默认关闭
--use_pyreader: 是否使用pyreader进行数据喂入。默认关闭
```
`注意`:进行预测时,所选择的module,checkpoint_dir,dataset必须和finetune所用的一样
#coding:utf-8
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
......@@ -9,11 +10,13 @@ import numpy as np
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=bool, default=True, help="Whether use GPU for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
# yapf: enable.
module_map = {
......@@ -56,6 +59,8 @@ def finetune(args):
feed_list = [img.name]
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
......
#coding:utf-8
import argparse
import os
import ast
import paddle.fluid as fluid
import paddlehub as hub
......@@ -8,11 +9,12 @@ import numpy as np
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--use_gpu", type=bool, default=False, help="Whether use GPU for predict.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for predict.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as a feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
# yapf: enable.
module_map = {
......@@ -56,6 +58,7 @@ def predict(args):
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
......
......@@ -19,10 +19,10 @@ if __name__ == "__main__":
results = lac.lexical_analysis(data=inputs)
for result in results:
if six.PY2:
print(json.dumps(
result['word'], encoding="utf8", ensure_ascii=False))
print(json.dumps(
result['tag'], encoding="utf8", ensure_ascii=False))
print(
json.dumps(result['word'], encoding="utf8", ensure_ascii=False))
print(
json.dumps(result['tag'], encoding="utf8", ensure_ascii=False))
else:
print(result['word'])
print(result['tag'])
......@@ -21,7 +21,7 @@ if __name__ == "__main__":
results[index]["text"] = text
for index, result in enumerate(results):
if six.PY2:
print(json.dumps(
results[index], encoding="utf8", ensure_ascii=False))
print(
json.dumps(results[index], encoding="utf8", ensure_ascii=False))
else:
print(results[index])
......@@ -33,15 +33,16 @@ from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# loading Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
input_dict, output_dict, program = module.context(
max_seq_len=args.max_seq_len)
inputs, outputs, program = module.context(max_seq_len=args.max_seq_len)
# Sentence labeling dataset reader
dataset = hub.dataset.MSRA_NER()
......@@ -53,70 +54,67 @@ if __name__ == '__main__':
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
with fluid.program_guard(program):
# Use "sequence_outputs" for token-level output.
sequence_output = output_dict["sequence_output"]
# Define a classfication finetune task by PaddleHub's API
seq_label_task = hub.create_seq_label_task(
feature=sequence_output,
num_classes=dataset.num_labels,
max_seq_len=args.max_seq_len)
# Construct transfer learning network
# Use "sequence_output" for token-level output.
sequence_output = outputs["sequence_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
# Compared to classification task, we need add seq_len tensor to feedlist
feed_list = [
input_dict["input_ids"].name, input_dict["position_ids"].name,
input_dict["segment_ids"].name, input_dict["input_mask"].name,
seq_label_task.variable('label').name,
seq_label_task.variable('seq_len').name
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
fetch_list = [
seq_label_task.variable("labels").name,
seq_label_task.variable("infers").name,
seq_label_task.variable("seq_len").name
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a sequence labeling finetune task by PaddleHub's API
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config)
# test data
data = [
["我们变而以书会友,以书结缘,把欧美、港台流行的食品类图谱、画册、工具书汇集一堂。"],
["为了跟踪国际最新食品工艺、流行趋势,大量搜集海外专业书刊资料是提高技艺的捷径。"],
["其中线装古籍逾千册;民国出版物几百种;珍本四册、稀见本四百余册,出版时间跨越三百余年。"],
["有的古木交柯,春机荣欣,从诗人句中得之,而入画中,观之令人心驰。"],
["不过重在晋趣,略增明人气息,妙在集古有道、不露痕迹罢了。"],
]
# classification probability tensor
probs = seq_label_task.variable("probs")
# load best model checkpoint
fluid.io.load_persistables(exe, args.checkpoint_dir)
inference_program = program.clone(for_test=True)
results = seq_label_task.predict(data=data)
# calculate the num of label from probs variable shape
num_labels = seq_label_task.variable("probs").shape[1]
for num_batch, batch_results in enumerate(results):
infers = batch_results[0].reshape([-1]).astype(np.int32).tolist()
np_lens = batch_results[1]
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
test_reader = reader.data_generator(phase='test', shuffle=False)
test_examples = dataset.get_test_examples()
total_label, total_infer, total_correct = 0.0, 0.0, 0.0
for index, batch in enumerate(test_reader()):
np_labels, np_infers, np_lens = exe.run(
feed=data_feeder.feed(batch),
fetch_list=fetch_list,
program=inference_program)
label_num, infer_num, correct_num = chunk_eval(
np_labels, np_infers, np_lens, num_labels)
for index, np_len in enumerate(np_lens):
labels = infers[index * args.max_seq_len:(index + 1) *
args.max_seq_len]
total_infer += infer_num
total_label += label_num
total_correct += correct_num
labels = np_labels.reshape([-1]).astype(np.int32).tolist()
label_str = ""
count = 0
for label_val in labels:
label_str += inv_label_map[label_val]
count += 1
if count == np_lens:
if count == np_len:
break
print("%s\tpredict=%s" % (test_examples[index], label_str))
precision, recall, f1 = calculate_f1(total_label, total_infer,
total_correct)
print("F1-Score=%f, precision=%f, recall=%f " % (f1, precision, recall))
# Drop the label results of CLS and SEP Token
print(
"%s\tpredict=%s" %
(data[num_batch * args.batch_size + index][0], label_str[1:-1]))
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_sequence_label/best_model"
CKPT_DIR="./ckpt_sequence_label"
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu True
......@@ -7,4 +7,6 @@ python -u sequence_label.py \
--num_epoch 3 \
--checkpoint_dir $CKPT_DIR \
--max_seq_len 256 \
--learning_rate 5e-5
--learning_rate 5e-5 \
--use_pyreader True \
--use_data_parallel True
......@@ -30,40 +30,34 @@ parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Step1: load Paddlehub ERNIE pretrained model
# Load Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use SequenceLabelReader to read dataset
# Download dataset and use SequenceLabelReader to read dataset
dataset = hub.dataset.MSRA_NER()
reader = hub.reader.SequenceLabelReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Step3: construct transfer learning network
# Construct transfer learning network
# Use "sequence_output" for token-level output.
sequence_output = outputs["sequence_output"]
# Define a sequence labeling finetune task by PaddleHub's API
seq_label_task = hub.create_seq_label_task(
feature=sequence_output,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels)
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
# Compared to classification task, we need add seq_len tensor to feedlist
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name,
seq_label_task.variable('label').name,
seq_label_task.variable('seq_len').name
inputs["segment_ids"].name, inputs["input_mask"].name
]
# Select a finetune strategy
......@@ -75,16 +69,23 @@ if __name__ == '__main__':
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub.finetune_and_eval(
task=seq_label_task,
# Define a sequence labeling finetune task by PaddleHub's API
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
seq_label_task.finetune_and_eval()
......@@ -31,16 +31,17 @@ import paddlehub as hub
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# loading Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
input_dict, output_dict, program = module.context(
max_seq_len=args.max_seq_len)
inputs, outputs, program = module.context(max_seq_len=args.max_seq_len)
# Sentence classification dataset reader
dataset = hub.dataset.ChnSentiCorp()
......@@ -51,46 +52,55 @@ if __name__ == '__main__':
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
with fluid.program_guard(program):
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
pooled_output = output_dict["pooled_output"]
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.create_text_cls_task(
feature=pooled_output, num_classes=dataset.num_labels)
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
input_dict["input_ids"].name, input_dict["position_ids"].name,
input_dict["segment_ids"].name, input_dict["input_mask"].name,
cls_task.variable('label').name
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# classificatin probability tensor
probs = cls_task.variable("probs")
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
pred = fluid.layers.argmax(probs, axis=1)
# load best model checkpoint
fluid.io.load_persistables(exe, args.checkpoint_dir)
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.TextClassifierTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
num_classes=dataset.num_labels,
config=config)
inference_program = program.clone(for_test=True)
# Data to be prdicted
data = [
["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"],
[
"还稍微重了点,可能是硬盘大的原故,还要再轻半斤就好了。其他要进一步验证。贴的几种膜气泡较多,用不了多久就要更换了,屏幕膜稍好点,但比没有要强多了。建议配赠几张膜让用用户自己贴。"
],
[
"前台接待太差,酒店有A B楼之分,本人check-in后,前台未告诉B楼在何处,并且B楼无明显指示;房间太小,根本不像4星级设施,下次不会再选择入住此店啦"
], ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]
]
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
test_reader = reader.data_generator(phase='test', shuffle=False)
test_examples = dataset.get_test_examples()
total = 0
correct = 0
for index, batch in enumerate(test_reader()):
pred_v = exe.run(
feed=data_feeder.feed(batch),
fetch_list=[pred.name],
program=inference_program)
total += 1
if (pred_v[0][0] == int(test_examples[index].label)):
correct += 1
acc = 1.0 * correct / total
print("%s\tpredict=%s" % (test_examples[index], pred_v[0][0]))
print("accuracy = %f" % acc)
index = 0
results = cls_task.predict(data=data)
for batch_result in results:
# get predict index
batch_result = np.argmax(batch_result, axis=2)[0]
for result in batch_result:
print("%s\tpredict=%s" % (data[index][0], result))
index += 1
export CUDA_VISIBLE_DEVICES=1
export CUDA_VISIBLE_DEVICES=0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task
DATASET="chnsenticorp"
......@@ -16,4 +16,6 @@ python -u text_classifier.py \
--learning_rate=5e-5 \
--weight_decay=0.01 \
--max_seq_len=128 \
--num_epoch=3
--num_epoch=3 \
--use_pyreader=True \
--use_data_parallel=True \
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_chnsenticorp/best_model"
CKPT_DIR="./ckpt_chnsenticorp"
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu False
......@@ -32,17 +32,19 @@ parser.add_argument("--data_dir", type=str, default=None, help="Path to training
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Step1: load Paddlehub ERNIE pretrained model
# Load Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
# module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Step2: Download dataset and use ClassifyReader to read dataset
# Download dataset and use ClassifyReader to read dataset
dataset = None
if args.dataset.lower() == "chnsenticorp":
dataset = hub.dataset.ChnSentiCorp()
......@@ -58,39 +60,44 @@ if __name__ == '__main__':
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Step3: construct transfer learning network
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.create_text_cls_task(
feature=pooled_output, num_classes=dataset.num_labels)
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name,
cls_task.variable('label').name
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Step4: Select finetune strategy, setup config and finetune
# Select finetune strategy, setup config and finetune
strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay,
learning_rate=args.learning_rate,
lr_scheduler="linear_decay",
)
lr_scheduler="linear_decay")
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Define a classfication finetune task by PaddleHub's API
cls_task = hub.TextClassifierTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
num_classes=dataset.num_labels,
config=config)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub.finetune_and_eval(
task=cls_task, data_reader=reader, feed_list=feed_list, config=config)
cls_task.finetune_and_eval()
......@@ -50,7 +50,7 @@ class ClearCommand(BaseCommand):
def __init__(self, name):
super(ClearCommand, self).__init__(name)
self.show_in_help = True
self.description = "Clear all cache data."
self.description = "Clear all cached data."
def cache_dir(self):
return CACHE_HOME
......
......@@ -110,8 +110,17 @@ class BasicTask(object):
# run config
self.config = config if config else RunConfig()
self.place, self.device_count = hub.common.get_running_device_info(
self.config)
self.place = self.places[0]
self.device_count = len(self.places)
if self.config.batch_size < self.device_count:
logger.warning(
"Batch size({}) is less than the count of devices({}), which is not allowed in current Paddle versions"
.format(self.config.batch_size, self.device_count))
logger.warning("Batch size automatically adjusted to {}".format(
self.device_count))
self.config._batch_size = self.device_count
self.exe = fluid.Executor(place=self.place)
self.build_strategy = fluid.BuildStrategy()
if self.config.enable_memory_optim:
......@@ -239,6 +248,12 @@ class BasicTask(object):
self.exe.run(self.env.startup_program)
self._build_env_end_event()
@property
def places(self):
if self.config.use_cuda:
return fluid.framework.cuda_places()
return fluid.framework.cpu_places()
@property
def is_train_phase(self):
return self.phase in ["train"]
......@@ -481,6 +496,9 @@ class BasicTask(object):
period_run_states = []
for run_step, batch in enumerate(self.reader(), start=1):
if self.config.use_data_parallel and len(batch) < self.device_count:
continue
step_run_state = RunState(len(self.fetch_list))
step_run_state.run_step = 1
num_batch_examples = len(batch)
......@@ -554,10 +572,10 @@ class BasicTask(object):
class ClassifierTask(BasicTask):
def __init__(self,
data_reader,
feature,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None):
......@@ -662,10 +680,10 @@ ImageClassifierTask = ClassifierTask
class TextClassifierTask(ClassifierTask):
def __init__(self,
data_reader,
feature,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None):
......@@ -711,8 +729,8 @@ class SequenceLabelTask(BasicTask):
feature,
max_seq_len,
num_classes,
data_reader,
feed_list,
data_reader,
startup_program=None,
config=None,
):
......@@ -743,6 +761,14 @@ class SequenceLabelTask(BasicTask):
name="cls_seq_label_out_b",
initializer=fluid.initializer.Constant(0.)))
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
ret_infers = fluid.layers.assign(self.ret_infers)
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64')
seq_len = fluid.layers.assign(self.seq_len)
logits = self.logits
logits = fluid.layers.flatten(logits, axis=2)
logits = fluid.layers.softmax(logits)
......@@ -761,13 +787,8 @@ class SequenceLabelTask(BasicTask):
return loss
def _add_metrics(self):
ret_labels = fluid.layers.reshape(x=self.label, shape=[-1, 1])
ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64')
seq_len = fluid.layers.assign(self.seq_len)
return [ret_labels, ret_infers, seq_len]
self.ret_labels = fluid.layers.reshape(x=self.label, shape=[-1, 1])
return [self.ret_labels, self.ret_infers, self.seq_len]
def _build_env_end_event(self):
with self.log_writer.mode(self.phase) as logw:
......@@ -834,4 +855,14 @@ class SequenceLabelTask(BasicTask):
feed_list = [varname for varname in self._base_feed_list]
if self.is_train_phase or self.is_test_phase:
feed_list += [self.label.name, self.seq_len.name]
else:
feed_list += [self.seq_len.name]
return feed_list
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
return [metric.name for metric in self.metrics] + [self.loss.name]
elif self.is_predict_phase:
return [self.ret_infers.name] + [self.seq_len.name]
return [self.output.name]
......@@ -463,13 +463,22 @@ class Module(object):
with fluid.program_guard(program):
result = []
index = 0
if "PADDLEHUB_CUDA_ENABLE" in os.environ:
place = fluid.CUDAPlace(0)
else:
place = fluid.CPUPlace()
if "PADDLEHUB_BATCH_SIZE" in os.environ:
batch_size = os.environ["PADDLEHUB_BATCH_SIZE"]
else:
batch_size = 1
exe = fluid.Executor(place=place)
data = self.processor.preprocess(
sign_name=sign_name, data_dict=data)
data_format = self.processor.data_format(sign_name=sign_name)
reader, feeder = _get_reader_and_feeder(data_format, data, place)
reader = paddle.batch(reader, batch_size=2)
reader = paddle.batch(reader, batch_size=batch_size)
for batch in reader():
data_out = exe.run(
feed=feeder.feed(batch),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册