diff --git a/paddle_hub/finetune/config.py b/paddle_hub/finetune/config.py index 755d1b49fb402b3c84175b6b89149f5491033f46..374d26e7cd517bbfa7cce24e151a9139edd9b7ec 100644 --- a/paddle_hub/finetune/config.py +++ b/paddle_hub/finetune/config.py @@ -29,5 +29,7 @@ FinetuneConfig = collections.namedtuple( 'max_seq_len', # for bert 'weight_decay', # for bert 'warmup_proportion', # for bert - 'in_tokens' # for bert + 'in_tokens', # for bert + 'strategy', + 'with_memory_optimization' ]) diff --git a/paddle_hub/finetune/finetune.py b/paddle_hub/finetune/finetune.py index db1c99b2513bcd86b3ef082ce9f566977a89d21c..2bb0909eb9b793d954c7d59b547d981176704531 100644 --- a/paddle_hub/finetune/finetune.py +++ b/paddle_hub/finetune/finetune.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. @@ -12,146 +12,160 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import os -import paddle.fluid as fluid import time -import numpy as np -import multiprocessing -from paddle_hub.finetune.optimization import bert_optimization -from paddle_hub.finetune.config import FinetuneConfig +import paddle +import paddle.fluid as fluid +from paddle_hub.tools.logger import logger -def finetune_and_eval(task, feed_list, data_processor, config=None): - # environment setup - if config.use_cuda: - place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) - dev_count = fluid.core.get_cuda_device_count() - else: - place = fluid.CPUPlace() - dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - exe = fluid.Executor(place) - # hub.finetune_and_eval start here - #TODO: to simplify - loss = task.variable("loss") - probs = task.variable("probs") - accuracy = task.variable("accuracy") - num_example = task.variable("num_example") +def optimizer_config_for_strategy(strategy, parameters, data_processor, + dev_count): + # basic configuration + learning_rate = 1e-4 + optimizer = fluid.optimizer.Adam(learning_rate) + regularizer = fluid.regularizer.L2DecayRegularizer( + regularization_coeff=1e-4) - num_train_examples = data_processor.get_num_examples(phase='train') - if config.in_tokens: - max_train_steps = config.num_epoch * num_train_examples // ( - config.batch_size // config.max_seq_len) // dev_count - else: - max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count + return optimizer - warmup_steps = int(max_train_steps * config.warmup_proportion) - # obtain main program from Task class - train_program = task.main_program() +def _finetune_model(task, + data_processor, + feed_list, + config=None, + eval_model=False): + main_program = task.main_program() startup_program = task.startup_program() - # clone test program before optimize - test_program = train_program.clone(for_test=True) - - bert_optimization(loss, warmup_steps, max_train_steps, config.learning_rate, - train_program, config.weight_decay) - - # memory optimization - fluid.memory_optimize( - input_program=train_program, - skip_opt_set=[ - # skip task graph variable memory optimization - loss.name, - probs.name, - accuracy.name, - num_example.name - ]) - - exe.run(startup_program) - feeder = fluid.DataFeeder(feed_list=feed_list, place=place) - - # Traning block - # prepare training dataset - total_loss, total_acc, total_num_example = [], [], [] - step = 0 - time_begin = time.time() - train_time_used = 0.0 - for epoch in range(1, config.num_epoch + 1): - print("Epoch {}".format(epoch)) - train_data_generator = data_processor.data_generator( - batch_size=config.batch_size, phase='train', shuffle=False) - for example in train_data_generator(): - step += 1 - train_time_begin = time.time() - np_loss, np_acc, np_num_example = exe.run( - program=train_program, - feed=feeder.feed([example]), - fetch_list=[loss, accuracy, num_example]) - train_time_used += time.time() - train_time_begin - - # Statistic Block - total_loss.extend(np_loss * np_num_example) - total_acc.extend(np_acc * np_num_example) - total_num_example.extend(np_num_example) - if step % config.log_interval == 0: - # get training progress - accum_num_example = np.sum(total_num_example) - print( - "step {}: loss={:.5f} acc={:.5f} [step/sec: {:.2f}]".format( - step, - np.sum(total_loss) / accum_num_example, - np.sum(total_acc) / accum_num_example, - config.log_interval / train_time_used)) - # reset statistic variables - total_loss, total_acc, total_num_example = [], [], [] - train_time_used = 0.0 - - # Evaluation block - if step % config.eval_interval == 0: - test_data_generator = data_processor.data_generator( - batch_size=config.batch_size, phase='test', shuffle=False) - dev_data_generator = data_processor.data_generator( - batch_size=config.batch_size, phase='dev', shuffle=False) - evaluate(task, test_program, exe, feeder, dev_data_generator) - evaluate(task, test_program, exe, feeder, test_data_generator) - - # Save model checkpoint - if step % config.save_ckpt_interval == 0: - save_checkpoint(exe, train_program, step, config.checkpoint_dir) - - # finish final evaluation on testset - test_data_generator = data_processor.data_generator( - batch_size=config.batch_size, phase='test', shuffle=False) - evaluate(task, test_program, exe, feeder, test_data_generator) - - -def save_checkpoint(exe, train_program, step, ckpt_dir): - #TODO: add global step variable for restore checkpoint like tensorflow - ckpt_step_dir = os.path.join(ckpt_dir, "step_{}".format(step)) - fluid.io.save_persistables(exe, ckpt_step_dir, train_program) - - -def evaluate(task, test_program, exe, feeder, data_generator): loss = task.variable("loss") - probs = task.variable("probs") accuracy = task.variable("accuracy") - num_example = task.variable("num_example") - - total_loss, total_acc, total_num_example = [], [], [] - eval_step = 0 - eval_time_begin = time.time() - for example in data_generator(): - eval_step += 1 - np_loss, np_acc, np_num_example = exe.run( - program=test_program, - feed=feeder.feed([example]), - fetch_list=[loss, accuracy, num_example]) - total_loss.extend(np_loss * np_num_example) - total_acc.extend(np_acc * np_num_example) - total_num_example.extend(np_num_example) - eval_time_used = time.time() - eval_time_begin - accum_num_example = np.sum(total_num_example) - print("[evaluation] loss={:.5f} acc={:.5f} [step/sec: {:.2f}]".format( - np.sum(total_loss) / accum_num_example, - np.sum(total_acc) / accum_num_example, eval_step / eval_time_used)) + + epoch = config.num_epoch + batch_size = config.batch_size + learning_rate = config.learning_rate + use_cuda = config.use_cuda + batch_size = config.batch_size + strategy = config.strategy + with_memory_optimization = config.with_memory_optimization + checkpoint_dir = config.checkpoint_dir + + with fluid.program_guard(main_program, startup_program): + + if use_cuda: + place = fluid.CUDAPlace(0) + dev_count = fluid.core.get_cuda_device_count() + else: + place = fluid.CPUPlace() + dev_count = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + + optimizer = optimizer_config_for_strategy( + strategy=strategy, + parameters=None, + data_processor=data_processor, + dev_count=dev_count) + data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place) + exe = fluid.Executor(place=place) + optimizer.minimize(loss) + + if with_memory_optimization: + logger.info("Memory optimize start") + fluid.memory_optimize( + input_program=fluid.default_main_program(), + skip_opt_set=[ + # skip task graph variable memory optimization + loss.name, + accuracy.name + ]) + logger.info("Memory optimize end") + + # initilize all parameters + exe.run(fluid.default_startup_program()) + step = 0 + logger.info("Finetune start") + train_time_begin = time.time() + for index in range(epoch): + train_reader = paddle.batch( + data_processor.data_generator(phase='train'), + batch_size=batch_size) + size = accuracy_sum = loss_sum = 0 + for batch in train_reader(): + loss_v, accuracy_v = exe.run( + feed=data_feeder.feed(batch), + fetch_list=[loss.name, accuracy.name]) + step += 1 + size += len(batch) + accuracy_sum += accuracy_v * len(batch) + loss_sum += loss_v * len(batch) + + if step % config.log_interval == 0: + train_time_used = time.time() - train_time_begin + perf = train_time_used / config.log_interval + train_time_begin = time.time() + logger.info( + "step %d: loss=%.5f acc=%.5f [step/sec: %.2f]" % + (step, loss_sum / size, accuracy_sum / size, perf)) + size = accuracy_sum = loss_sum = 0 + + if step % config.save_ckpt_interval == 0: + model_save_dir = os.path.join( + checkpoint_dir, "model_parameters_in_step%d" % step) + fluid.io.save_persistables(exe, dirname=model_save_dir) + + if eval_model and step % config.eval_interval == 0: + eval(task, data_processor, feed_list, config) + # eval before end + if eval_model: + eval(task, data_processor, feed_list, config) + logger.info("Finetune end") + + +def save_model_and_checkpoint(task, save_dir): + pass + + +def finetune_and_eval( + task, + data_processor, + feed_list, + config=None, +): + _finetune_model(task, data_processor, feed_list, config, eval_model=True) + + +def finetune(task, data_processor, feed_list, config=None): + _finetune_model(task, data_processor, feed_list, config, eval_model=False) + + +def eval(task, data_processor, feed_list, config=None): + inference_program = task.inference_program() + main_program = task.main_program() + loss = task.variable("loss") + accuracy = task.variable("accuracy") + use_cuda = config.use_cuda + batch_size = config.batch_size + logger.info("[Evaluation] start") + with fluid.program_guard(inference_program): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place) + exe = fluid.Executor(place=place) + size = accuracy_sum = loss_sum = 0 + test_reader = paddle.batch( + data_processor.data_generator(phase='test'), batch_size=batch_size) + eval_time_begin = time.time() + for index, batch in enumerate(test_reader()): + loss_v, accuracy_v, = exe.run( + feed=data_feeder.feed(batch), fetch_list=[loss, accuracy.name]) + size += len(batch) + accuracy_sum += accuracy_v * len(batch) + loss_sum += loss_v * len(batch) + eval_time_used = time.time() - eval_time_begin + perf = eval_time_used / index + logger.info("[Evaluation] loss=%.5f acc=%.5f [step/sec: %.2f]" % + (loss_sum / size, accuracy_sum / size, perf))