提交 f04836a1 编写于 作者: W wuzewu

update finetune method

上级 34855d8e
......@@ -29,5 +29,7 @@ FinetuneConfig = collections.namedtuple(
'max_seq_len', # for bert
'weight_decay', # for bert
'warmup_proportion', # for bert
'in_tokens' # for bert
'in_tokens', # for bert
'strategy',
'with_memory_optimization'
])
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
......@@ -12,146 +12,160 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import paddle.fluid as fluid
import time
import numpy as np
import multiprocessing
from paddle_hub.finetune.optimization import bert_optimization
from paddle_hub.finetune.config import FinetuneConfig
import paddle
import paddle.fluid as fluid
from paddle_hub.tools.logger import logger
def finetune_and_eval(task, feed_list, data_processor, config=None):
# environment setup
if config.use_cuda:
place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exe = fluid.Executor(place)
# hub.finetune_and_eval start here
#TODO: to simplify
loss = task.variable("loss")
probs = task.variable("probs")
accuracy = task.variable("accuracy")
num_example = task.variable("num_example")
def optimizer_config_for_strategy(strategy, parameters, data_processor,
dev_count):
# basic configuration
learning_rate = 1e-4
optimizer = fluid.optimizer.Adam(learning_rate)
regularizer = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)
num_train_examples = data_processor.get_num_examples(phase='train')
if config.in_tokens:
max_train_steps = config.num_epoch * num_train_examples // (
config.batch_size // config.max_seq_len) // dev_count
else:
max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
return optimizer
warmup_steps = int(max_train_steps * config.warmup_proportion)
# obtain main program from Task class
train_program = task.main_program()
def _finetune_model(task,
data_processor,
feed_list,
config=None,
eval_model=False):
main_program = task.main_program()
startup_program = task.startup_program()
# clone test program before optimize
test_program = train_program.clone(for_test=True)
bert_optimization(loss, warmup_steps, max_train_steps, config.learning_rate,
train_program, config.weight_decay)
# memory optimization
fluid.memory_optimize(
input_program=train_program,
skip_opt_set=[
# skip task graph variable memory optimization
loss.name,
probs.name,
accuracy.name,
num_example.name
])
exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
# Traning block
# prepare training dataset
total_loss, total_acc, total_num_example = [], [], []
step = 0
time_begin = time.time()
train_time_used = 0.0
for epoch in range(1, config.num_epoch + 1):
print("Epoch {}".format(epoch))
train_data_generator = data_processor.data_generator(
batch_size=config.batch_size, phase='train', shuffle=False)
for example in train_data_generator():
step += 1
train_time_begin = time.time()
np_loss, np_acc, np_num_example = exe.run(
program=train_program,
feed=feeder.feed([example]),
fetch_list=[loss, accuracy, num_example])
train_time_used += time.time() - train_time_begin
# Statistic Block
total_loss.extend(np_loss * np_num_example)
total_acc.extend(np_acc * np_num_example)
total_num_example.extend(np_num_example)
if step % config.log_interval == 0:
# get training progress
accum_num_example = np.sum(total_num_example)
print(
"step {}: loss={:.5f} acc={:.5f} [step/sec: {:.2f}]".format(
step,
np.sum(total_loss) / accum_num_example,
np.sum(total_acc) / accum_num_example,
config.log_interval / train_time_used))
# reset statistic variables
total_loss, total_acc, total_num_example = [], [], []
train_time_used = 0.0
# Evaluation block
if step % config.eval_interval == 0:
test_data_generator = data_processor.data_generator(
batch_size=config.batch_size, phase='test', shuffle=False)
dev_data_generator = data_processor.data_generator(
batch_size=config.batch_size, phase='dev', shuffle=False)
evaluate(task, test_program, exe, feeder, dev_data_generator)
evaluate(task, test_program, exe, feeder, test_data_generator)
# Save model checkpoint
if step % config.save_ckpt_interval == 0:
save_checkpoint(exe, train_program, step, config.checkpoint_dir)
# finish final evaluation on testset
test_data_generator = data_processor.data_generator(
batch_size=config.batch_size, phase='test', shuffle=False)
evaluate(task, test_program, exe, feeder, test_data_generator)
def save_checkpoint(exe, train_program, step, ckpt_dir):
#TODO: add global step variable for restore checkpoint like tensorflow
ckpt_step_dir = os.path.join(ckpt_dir, "step_{}".format(step))
fluid.io.save_persistables(exe, ckpt_step_dir, train_program)
def evaluate(task, test_program, exe, feeder, data_generator):
loss = task.variable("loss")
probs = task.variable("probs")
accuracy = task.variable("accuracy")
num_example = task.variable("num_example")
total_loss, total_acc, total_num_example = [], [], []
eval_step = 0
eval_time_begin = time.time()
for example in data_generator():
eval_step += 1
np_loss, np_acc, np_num_example = exe.run(
program=test_program,
feed=feeder.feed([example]),
fetch_list=[loss, accuracy, num_example])
total_loss.extend(np_loss * np_num_example)
total_acc.extend(np_acc * np_num_example)
total_num_example.extend(np_num_example)
eval_time_used = time.time() - eval_time_begin
accum_num_example = np.sum(total_num_example)
print("[evaluation] loss={:.5f} acc={:.5f} [step/sec: {:.2f}]".format(
np.sum(total_loss) / accum_num_example,
np.sum(total_acc) / accum_num_example, eval_step / eval_time_used))
epoch = config.num_epoch
batch_size = config.batch_size
learning_rate = config.learning_rate
use_cuda = config.use_cuda
batch_size = config.batch_size
strategy = config.strategy
with_memory_optimization = config.with_memory_optimization
checkpoint_dir = config.checkpoint_dir
with fluid.program_guard(main_program, startup_program):
if use_cuda:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
optimizer = optimizer_config_for_strategy(
strategy=strategy,
parameters=None,
data_processor=data_processor,
dev_count=dev_count)
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe = fluid.Executor(place=place)
optimizer.minimize(loss)
if with_memory_optimization:
logger.info("Memory optimize start")
fluid.memory_optimize(
input_program=fluid.default_main_program(),
skip_opt_set=[
# skip task graph variable memory optimization
loss.name,
accuracy.name
])
logger.info("Memory optimize end")
# initilize all parameters
exe.run(fluid.default_startup_program())
step = 0
logger.info("Finetune start")
train_time_begin = time.time()
for index in range(epoch):
train_reader = paddle.batch(
data_processor.data_generator(phase='train'),
batch_size=batch_size)
size = accuracy_sum = loss_sum = 0
for batch in train_reader():
loss_v, accuracy_v = exe.run(
feed=data_feeder.feed(batch),
fetch_list=[loss.name, accuracy.name])
step += 1
size += len(batch)
accuracy_sum += accuracy_v * len(batch)
loss_sum += loss_v * len(batch)
if step % config.log_interval == 0:
train_time_used = time.time() - train_time_begin
perf = train_time_used / config.log_interval
train_time_begin = time.time()
logger.info(
"step %d: loss=%.5f acc=%.5f [step/sec: %.2f]" %
(step, loss_sum / size, accuracy_sum / size, perf))
size = accuracy_sum = loss_sum = 0
if step % config.save_ckpt_interval == 0:
model_save_dir = os.path.join(
checkpoint_dir, "model_parameters_in_step%d" % step)
fluid.io.save_persistables(exe, dirname=model_save_dir)
if eval_model and step % config.eval_interval == 0:
eval(task, data_processor, feed_list, config)
# eval before end
if eval_model:
eval(task, data_processor, feed_list, config)
logger.info("Finetune end")
def save_model_and_checkpoint(task, save_dir):
pass
def finetune_and_eval(
task,
data_processor,
feed_list,
config=None,
):
_finetune_model(task, data_processor, feed_list, config, eval_model=True)
def finetune(task, data_processor, feed_list, config=None):
_finetune_model(task, data_processor, feed_list, config, eval_model=False)
def eval(task, data_processor, feed_list, config=None):
inference_program = task.inference_program()
main_program = task.main_program()
loss = task.variable("loss")
accuracy = task.variable("accuracy")
use_cuda = config.use_cuda
batch_size = config.batch_size
logger.info("[Evaluation] start")
with fluid.program_guard(inference_program):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
exe = fluid.Executor(place=place)
size = accuracy_sum = loss_sum = 0
test_reader = paddle.batch(
data_processor.data_generator(phase='test'), batch_size=batch_size)
eval_time_begin = time.time()
for index, batch in enumerate(test_reader()):
loss_v, accuracy_v, = exe.run(
feed=data_feeder.feed(batch), fetch_list=[loss, accuracy.name])
size += len(batch)
accuracy_sum += accuracy_v * len(batch)
loss_sum += loss_v * len(batch)
eval_time_used = time.time() - eval_time_begin
perf = eval_time_used / index
logger.info("[Evaluation] loss=%.5f acc=%.5f [step/sec: %.2f]" %
(loss_sum / size, accuracy_sum / size, perf))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册