提交 2620edc3 编写于 作者: W wuzewu

Refactoring the task class

上级 5ee30a94
......@@ -8,7 +8,7 @@ import numpy as np
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=bool, default=False, help="Whether use GPU for fine-tuning.")
parser.add_argument("--use_gpu", type=bool, default=True, help="Whether use GPU for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.")
......@@ -50,11 +50,9 @@ def finetune(args):
dataset=dataset)
feature_map = output_dict["feature_map"]
task = hub.create_img_cls_task(
feature=feature_map, num_classes=dataset.num_labels)
img = input_dict["image"]
feed_list = [img.name, task.variable('label').name]
feed_list = [img.name]
config = hub.RunConfig(
use_cuda=args.use_gpu,
......@@ -64,8 +62,13 @@ def finetune(args):
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
hub.finetune_and_eval(
task, feed_list=feed_list, data_reader=data_reader, config=config)
task = hub.ImageClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
task.finetune_and_eval()
if __name__ == "__main__":
......
......@@ -9,6 +9,7 @@ import numpy as np
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--use_gpu", type=bool, default=False, help="Whether use GPU for predict.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as a feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.")
# yapf: enable.
......@@ -24,6 +25,8 @@ module_map = {
def predict(args):
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context(trainable=True)
if args.dataset.lower() == "flowers":
dataset = hub.dataset.Flowers()
......@@ -38,45 +41,43 @@ def predict(args):
else:
raise ValueError("%s dataset is not defined" % args.dataset)
label_map = dataset.label_dict()
num_labels = len(label_map)
module = hub.Module(name=args.module)
input_dict, output_dict, program = module.context()
data_reader = hub.reader.ImageClassificationReader(
image_width=module.get_expected_image_width(),
image_height=module.get_expected_image_height(),
images_mean=module.get_pretrained_images_mean(),
images_std=module.get_pretrained_images_std(),
dataset=None)
dataset=dataset)
img = input_dict["image"]
feature_map = output_dict["feature_map"]
task = hub.create_img_cls_task(feature=feature_map, num_classes=num_labels)
img = input_dict["image"]
feed_list = [img.name]
with fluid.program_guard(task.inference_program()):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
pretrained_model_dir = os.path.join(args.checkpoint_dir, "best_model")
if not os.path.exists(pretrained_model_dir):
hub.logger.error(
"pretrained model dir %s didn't exist" % pretrained_model_dir)
exit(1)
fluid.io.load_persistables(exe, pretrained_model_dir)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
data = ["test/test_img_roses.jpg", "test/test_img_daisy.jpg"]
config = hub.RunConfig(
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
predict_reader = data_reader.data_generator(
phase="predict", batch_size=1, data=data)
for index, batch in enumerate(predict_reader()):
result, = exe.run(
feed=feeder.feed(batch), fetch_list=[task.variable('probs')])
predict_result = label_map[np.argsort(result[0])[::-1][0]]
print("input %i is %s, and the predict result is %s" %
(index, data[index], predict_result))
task = hub.ClassifierTask(
data_reader=data_reader,
feed_list=feed_list,
feature=feature_map,
num_classes=dataset.num_labels,
config=config)
data = ["./test/test_img_daisy.jpg", "./test/test_img_roses.jpg"]
label_map = dataset.label_dict()
for result in task.predict(data=data):
result = np.argmax(result, axis=2)
index = 0
for batch in result:
for predict_result in batch:
index += 1
predict_result = label_map[predict_result]
print("input %i is %s, and the predict result is %s" %
(index, data[index - 1], predict_result))
if __name__ == "__main__":
......
......@@ -38,11 +38,10 @@ from .module.manager import default_module_manager
from .io.type import DataType
from .finetune.task import Task
from .finetune.task import create_seq_label_task
from .finetune.task import create_text_cls_task
from .finetune.task import create_img_cls_task
from .finetune.finetune import finetune_and_eval
from .finetune.task import ClassifierTask
from .finetune.task import TextClassifierTask
from .finetune.task import ImageClassifierTask
from .finetune.task import SequenceLabelTask
from .finetune.config import RunConfig
from .finetune.strategy import AdamWeightDecayStrategy
from .finetune.strategy import DefaultStrategy
......
......@@ -26,7 +26,11 @@ from paddlehub.common.logger import logger
CKPT_FILE_NAME = "ckpt.meta"
def load_checkpoint(checkpoint_dir, exe):
def load_checkpoint(checkpoint_dir,
exe,
main_program=fluid.default_main_program(),
startup_program=fluid.default_startup_program()):
ckpt_meta_path = os.path.join(checkpoint_dir, CKPT_FILE_NAME)
logger.info("Try loading checkpoint from {}".format(ckpt_meta_path))
if os.path.exists(ckpt_meta_path):
......@@ -34,7 +38,7 @@ def load_checkpoint(checkpoint_dir, exe):
with open(ckpt_meta_path, "rb") as f:
ckpt.ParseFromString(f.read())
fluid.io.load_persistables(exe, ckpt.latest_model_dir)
fluid.io.load_persistables(exe, ckpt.latest_model_dir, main_program)
logger.info("PaddleHub model checkpoint loaded. current_epoch={}, "
"global_step={}".format(ckpt.current_epoch,
......@@ -47,18 +51,24 @@ def load_checkpoint(checkpoint_dir, exe):
logger.info(
"PaddleHub model checkpoint not found, start training from scratch..."
)
exe.run(fluid.default_startup_program())
exe.run(startup_program)
return current_epoch, global_step
def save_checkpoint(checkpoint_dir, current_epoch, global_step, exe):
def save_checkpoint(checkpoint_dir,
current_epoch,
global_step,
exe,
main_program=fluid.default_main_program()):
ckpt_meta_path = os.path.join(checkpoint_dir, CKPT_FILE_NAME)
ckpt = checkpoint_pb2.CheckPoint()
model_saved_dir = os.path.join(checkpoint_dir, "step_%d" % global_step)
logger.info("Saving model checkpoint to {}".format(model_saved_dir))
fluid.io.save_persistables(exe, dirname=model_saved_dir)
fluid.io.save_persistables(
exe, dirname=model_saved_dir, main_program=main_program)
ckpt.current_epoch = current_epoch
ckpt.global_step = global_step
......
......@@ -25,95 +25,6 @@ from paddlehub.common.logger import logger
import paddlehub as hub
def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
logger.info("Evaluation on {} dataset start".format(phase))
test_program = task.test_program()
main_program = task.main_program()
loss = task.variable("loss")
accuracy = task.variable("accuracy")
batch_size = config.batch_size
place, dev_count = hub.common.get_running_device_info(config)
exe = fluid.Executor(place=place)
with fluid.program_guard(test_program):
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
num_eval_examples = acc_sum = loss_sum = 0
test_reader = data_reader.data_generator(
batch_size=batch_size, phase=phase)
eval_time_begin = time.time()
eval_step = 0
for batch in test_reader():
num_batch_examples = len(batch)
eval_step += 1
loss_v, accuracy_v = exe.run(
feed=data_feeder.feed(batch),
fetch_list=[loss.name, accuracy.name])
num_eval_examples += num_batch_examples
if num_eval_examples % 10000 == 0:
logger.info("{} examples evaluated.".format(num_eval_examples))
acc_sum += accuracy_v * num_batch_examples
loss_sum += loss_v * num_batch_examples
eval_time_used = time.time() - eval_time_begin
avg_loss = loss_sum / num_eval_examples
avg_acc = acc_sum / num_eval_examples
eval_speed = eval_step / eval_time_used
logger.info(
"[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]" %
(phase, avg_loss, avg_acc, eval_speed))
return avg_loss, avg_acc, eval_speed
def evaluate_seq_label_task(task,
data_reader,
feed_list,
phase="test",
config=None):
fetch_list = [
task.variable("labels").name,
task.variable("infers").name,
task.variable("seq_len").name,
task.variable("loss").name
]
logger.info("Evaluation on {} dataset start".format(phase))
test_program = task.test_program()
batch_size = config.batch_size
place, dev_count = hub.common.get_running_device_info(config)
exe = fluid.Executor(place=place)
# calculate the num of label from probs variable shape
num_labels = task.variable("probs").shape[1]
with fluid.program_guard(test_program):
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
num_eval_examples = acc_sum = loss_sum = 0
test_reader = data_reader.data_generator(
batch_size=batch_size, phase=phase)
eval_time_begin = time.time()
eval_step = 0
total_label, total_infer, total_correct = 0.0, 0.0, 0.0
for batch in test_reader():
num_batch_examples = len(batch)
eval_step += 1
np_labels, np_infers, np_lens, _ = exe.run(
feed=data_feeder.feed(batch), fetch_list=fetch_list)
label_num, infer_num, correct_num = chunk_eval(
np_labels, np_infers, np_lens, num_labels, dev_count)
total_infer += infer_num
total_label += label_num
total_correct += correct_num
precision, recall, f1 = calculate_f1(total_label, total_infer,
total_correct)
eval_time_used = time.time() - eval_time_begin
eval_speed = eval_step / eval_time_used
logger.info(
"[%s evaluation] F1-Score=%f, precision=%f, recall=%f [step/sec: %.2f]"
% (phase, f1, precision, recall, eval_speed))
return f1, precision, recall
# Sequence label evaluation functions
def chunk_eval(np_labels, np_infers, np_lens, tag_num, dev_count=1):
def extract_bio_chunk(seq):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import paddle
import paddle.fluid as fluid
import numpy as np
from visualdl import LogWriter
from paddlehub.common.logger import logger
from paddlehub.common.utils import mkdir
from paddlehub.finetune.config import RunConfig
from paddlehub.finetune.strategy import AdamWeightDecayStrategy, DefaultStrategy
from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint
from paddlehub.finetune.evaluate import evaluate_cls_task, evaluate_seq_label_task
import paddlehub as hub
def _do_memory_optimization(task, config):
if config.enable_memory_optim:
logger.info("Memory optimization start...")
task_var_name = task.metric_variable_names()
logger.info(
"Skip memory optimization on variables: {}".format(task_var_name))
optimize_time_begin = time.time()
fluid.memory_optimize(
input_program=fluid.default_main_program(),
# skip memory optimization on task metric variables
skip_opt_set=task_var_name)
time_used = time.time() - optimize_time_begin
logger.info("Memory optimization done! Time elapsed %f sec" % time_used)
# lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
# program=task.main_program(), batch_size=config.batch_size)
# logger.info("Theoretical memory usage in training: %.2f - %.2f %s" %
# (lower_mem, upper_mem, unit)),
def _finetune_seq_label_task(task,
data_reader,
feed_list,
config=None,
do_eval=False):
"""
Finetune sequence labeling task, evaluate metric is F1, precision and recall
"""
main_program = task.main_program()
startup_program = task.startup_program()
loss = task.variable("loss")
seq_len = task.variable("seq_len")
num_epoch = config.num_epoch
batch_size = config.batch_size
log_writer = LogWriter(
os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=1)
place, dev_count = hub.common.get_running_device_info(config)
with fluid.program_guard(main_program, startup_program):
exe = fluid.Executor(place=place)
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
# Select strategy
if isinstance(config.strategy, hub.AdamWeightDecayStrategy):
scheduled_lr = config.strategy.execute(loss, main_program,
data_reader, config)
elif isinstance(config.strategy, hub.DefaultStrategy):
config.strategy.execute(loss)
#TODO: add more finetune strategy
_do_memory_optimization(task, config)
# Try to restore model training checkpoint
current_epoch, global_step = load_checkpoint(config.checkpoint_dir, exe)
best_eval_f1 = 0.0
train_time_used = 0
logger.info("PaddleHub finetune start")
exe.run(fluid.default_startup_program())
# add visualdl scalar
with log_writer.mode("train") as logw:
train_loss_scalar = logw.scalar(tag="Loss [train]")
with log_writer.mode("evaluate") as logw:
eval_f1_scalar = logw.scalar(tag="F1 [eval]")
eval_precision_scalar = logw.scalar(tag="Precision [eval]")
eval_recall_scalar = logw.scalar(tag="Recall [eval]")
# Finetune loop
for epoch in range(current_epoch, num_epoch + 1):
train_reader = data_reader.data_generator(
batch_size=batch_size, phase='train')
num_trained_examples = loss_sum = 0
for batch in train_reader():
num_batch_examples = len(batch)
train_time_begin = time.time()
loss_v = exe.run(
feed=data_feeder.feed(batch), fetch_list=[loss.name])
train_time_used += time.time() - train_time_begin
global_step += 1
num_trained_examples += num_batch_examples
loss_sum += loss_v[0] * num_batch_examples
# log fintune status
if global_step % config.log_interval == 0:
avg_loss = loss_sum / num_trained_examples
speed = config.log_interval / train_time_used
logger.info("step %d: loss=%.5f [step/sec: %.2f]" %
(global_step, avg_loss, speed))
train_loss_scalar.add_record(global_step, avg_loss)
train_time_used = 0
num_trained_examples = 0
loss_sum = 0
if config.save_ckpt_interval and global_step % config.save_ckpt_interval == 0:
# NOTE: current saved checkpoint machanism is not completed,
# it can't restore correct dataset training status
save_checkpoint(
checkpoint_dir=config.checkpoint_dir,
current_epoch=epoch,
global_step=global_step,
exe=exe)
if do_eval and global_step % config.eval_interval == 0:
f1, precision, recall = evaluate_seq_label_task(
task,
data_reader,
feed_list,
phase="dev",
config=config)
eval_f1_scalar.add_record(global_step, f1)
eval_precision_scalar.add_record(global_step, precision)
eval_recall_scalar.add_record(global_step, recall)
if f1 > best_eval_f1:
best_eval_f1 = f1
model_saved_dir = os.path.join(config.checkpoint_dir,
"best_model")
logger.info("best model saved to %s [best F1=%.5f]" %
(model_saved_dir, best_eval_f1))
fluid.io.save_persistables(exe, dirname=model_saved_dir)
# NOTE: current saved checkpoint machanism is not completed, it can't
# resotre dataset training status
save_checkpoint(
checkpoint_dir=config.checkpoint_dir,
current_epoch=num_epoch + 1,
global_step=global_step,
exe=exe)
# Final evaluation
if do_eval:
evaluate_seq_label_task(
task, data_reader, feed_list, phase="dev", config=config)
evaluate_seq_label_task(
task, data_reader, feed_list, phase="test", config=config)
logger.info("PaddleHub finetune finished.")
def _finetune_cls_task(task, data_reader, feed_list, config=None,
do_eval=False):
main_program = task.main_program()
startup_program = task.startup_program()
loss = task.variable("loss")
accuracy = task.variable("accuracy")
num_epoch = config.num_epoch
batch_size = config.batch_size
log_writer = LogWriter(
os.path.join(config.checkpoint_dir, "vdllog"), sync_cycle=1)
place, dev_count = hub.common.get_running_device_info(config)
with fluid.program_guard(main_program, startup_program):
exe = fluid.Executor(place=place)
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
# select strategy
if isinstance(config.strategy, hub.AdamWeightDecayStrategy):
scheduled_lr = config.strategy.execute(loss, main_program,
data_reader, config)
elif isinstance(config.strategy, hub.DefaultStrategy):
config.strategy.execute(loss)
#TODO: add more finetune strategy
_do_memory_optimization(task, config)
# Try to restore model training checkpoint
current_epoch, global_step = load_checkpoint(config.checkpoint_dir, exe)
best_eval_acc = 0.0
train_time_used = 0
logger.info("PaddleHub finetune start")
# add visualdl scalar
with log_writer.mode("train") as logw:
train_loss_scalar = logw.scalar(tag="Loss [train]")
train_acc_scalar = logw.scalar(tag="Accuracy [train]")
with log_writer.mode("evaluate") as logw:
eval_loss_scalar = logw.scalar(tag="Loss [eval]")
eval_acc_scalar = logw.scalar(tag="Accuracy [eval]")
exe.run(fluid.default_startup_program())
# Finetune loop
for epoch in range(current_epoch, num_epoch + 1):
train_reader = data_reader.data_generator(
batch_size=batch_size, phase='train')
num_trained_examples = acc_sum = loss_sum = 0
for batch in train_reader():
num_batch_examples = len(batch)
train_time_begin = time.time()
loss_v, accuracy_v = exe.run(
feed=data_feeder.feed(batch),
fetch_list=[loss.name, accuracy.name],
return_numpy=False)
loss_v = np.array(loss_v)
accuracy_v = np.array(accuracy_v)
train_time_used += time.time() - train_time_begin
global_step += 1
num_trained_examples += num_batch_examples
acc_sum += accuracy_v * num_batch_examples
loss_sum += loss_v * num_batch_examples
# log fintune status
if global_step % config.log_interval == 0:
avg_loss = loss_sum / num_trained_examples
avg_acc = acc_sum / num_trained_examples
speed = config.log_interval / train_time_used
logger.info("step %d: loss=%.5f acc=%.5f [step/sec: %.2f]" %
(global_step, avg_loss, avg_acc, speed))
# record visualdl log
train_loss_scalar.add_record(global_step, avg_loss)
train_acc_scalar.add_record(global_step, avg_acc)
train_time_used = 0
num_trained_examples = acc_sum = loss_sum = 0
if config.save_ckpt_interval and global_step % config.save_ckpt_interval == 0:
# NOTE: current saved checkpoint machanism is not completed,
# it can't restore dataset training status
save_checkpoint(
checkpoint_dir=config.checkpoint_dir,
current_epoch=epoch,
global_step=global_step,
exe=exe)
if do_eval and global_step % config.eval_interval == 0:
eval_loss, eval_acc, eval_perf = evaluate_cls_task(
task,
data_reader,
feed_list,
phase="val",
config=config)
eval_loss_scalar.add_record(global_step, eval_loss)
eval_acc_scalar.add_record(global_step, eval_acc)
if eval_acc > best_eval_acc:
best_eval_acc = eval_acc
model_saved_dir = os.path.join(config.checkpoint_dir,
"best_model")
logger.info(
"best model saved to %s [best accuracy=%.5f]" %
(model_saved_dir, best_eval_acc))
fluid.io.save_persistables(exe, dirname=model_saved_dir)
# NOTE: current saved checkpoint machanism is not completed, it can't
# resotre dataset training status
save_checkpoint(
checkpoint_dir=config.checkpoint_dir,
current_epoch=num_epoch + 1,
global_step=global_step,
exe=exe)
# Final evaluation
if do_eval:
evaluate_cls_task(
task, data_reader, feed_list, phase="dev", config=config)
evaluate_cls_task(
task, data_reader, feed_list, phase="test", config=config)
logger.info("PaddleHub finetune finished.")
def finetune_and_eval(task, data_reader, feed_list, config=None):
if config is None:
config = RunConfig()
if not os.path.exists(config.checkpoint_dir):
mkdir(config.checkpoint_dir)
if task.task_type == "sequence_labeling":
_finetune_seq_label_task(
task, data_reader, feed_list, config, do_eval=True)
elif task.task_type == "image_classification" or task.task_type == "text_classification":
_finetune_cls_task(task, data_reader, feed_list, config, do_eval=True)
......@@ -74,7 +74,7 @@ class DefaultStrategy(object):
self.optimizer = fluid.optimizer.Adam(
learning_rate=self.learning_rate)
def execute(self, loss):
def execute(self, loss, data_reader, config):
if self.optimizer is not None:
self.optimizer.minimize(loss)
else:
......@@ -114,7 +114,8 @@ class AdamWeightDecayStrategy(DefaultStrategy):
def weight_decay(self):
return self._weight_decay
def execute(self, loss, main_program, data_reader, config):
def execute(self, loss, data_reader, config):
main_program = loss.block.program
# calculate wamrup step
dev_count = self._get_dev_count(config)
data_reader.data_generator(
......@@ -158,7 +159,7 @@ class DefaultFinetuneStrategy(DefaultStrategy):
self._optimizer_name = optimizer_name
self.regularization_coeff = regularization_coeff
def execute(self, loss):
def execute(self, loss, data_reader, config):
# get pretrained parameters
program = loss.block.program
global_block = program.global_block()
......@@ -187,7 +188,7 @@ class L2SPFinetuneStrategy(DefaultStrategy):
self._optimizer_name = optimizer_name
self.regularization_coeff = regularization_coeff
def execute(self, loss):
def execute(self, loss, data_reader, config):
# get pretrained parameters
program = loss.block.program
global_block = program.global_block()
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册