# coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse import shutil from paddlehub.commands.base_command import BaseCommand, ENTRY from paddlehub.autofinetune.autoft import PSHE2 from paddlehub.autofinetune.autoft import HAZero from paddlehub.autofinetune.evaluator import FullTrailEvaluator from paddlehub.autofinetune.evaluator import PopulationBasedEvaluator class AutoFineTuneCommand(BaseCommand): name = "autofinetune" def __init__(self, name): super(AutoFineTuneCommand, self).__init__(name) self.show_in_help = True self.name = name self.description = "PaddleHub helps to finetune a task by searching hyperparameters automatically." self.parser = argparse.ArgumentParser( description=self.__class__.__doc__, prog='%s %s ' % (ENTRY, self.name), usage='%(prog)s', add_help=False) self.module = None def add_params_file_arg(self): self.arg_params_to_be_searched_group.add_argument( "--param_file", type=str, default=None, required=True, help= "Hyperparameters to be searched in the yaml format. The number of hyperparameters searched must be greater than 1." ) def add_autoft_config_arg(self): self.arg_config_group.add_argument( "--popsize", type=int, default=5, help="Population size") self.arg_config_group.add_argument( "--gpu", type=str, default="0", required=True, help="The list of gpu devices to be used") self.arg_config_group.add_argument( "--round", type=int, default=10, help="Number of searches") self.arg_config_group.add_argument( "--output_dir", type=str, default=None, help="Directory to model checkpoint") self.arg_config_group.add_argument( "--evaluator", type=str, default="populationbased", help="Choices: fulltrail or populationbased.") self.arg_config_group.add_argument( "--tuning_strategy", type=str, default="pshe2", help="Choices: HAZero or PSHE2.") self.arg_config_group.add_argument( 'opts', help='See utils/config.py for all options', default=None, nargs=argparse.REMAINDER) def convert_to_other_options(self, config_list): if len(config_list) % 2 != 0: raise ValueError( "Command for finetuned task options config format error! Please check it: {}" .format(config_list)) options_str = "" for key, value in zip(config_list[0::2], config_list[1::2]): options_str += "--" + key + "=" + value + " " return options_str def execute(self, argv): if not argv: print("ERROR: Please specify a script to be finetuned in python.\n") self.help() return False self.fintunee_script = argv[0] self.parser.prog = '%s %s %s' % (ENTRY, self.name, self.fintunee_script) self.arg_params_to_be_searched_group = self.parser.add_argument_group( title="Input options", description="Hyperparameters to be searched.") self.arg_config_group = self.parser.add_argument_group( title="Autofinetune config options", description= "Autofintune configuration for controlling autofinetune behavior, not required" ) self.arg_finetuned_task_group = self.parser.add_argument_group( title="Finetuned task config options", description= "Finetuned task configuration for controlling finetuned task behavior, not required" ) self.add_params_file_arg() self.add_autoft_config_arg() if not argv[1:]: self.help() return False self.args = self.parser.parse_args(argv[1:]) options_str = "" if self.args.opts is not None: options_str = self.convert_to_other_options(self.args.opts) device_ids = self.args.gpu.strip().split(",") device_ids = [int(device_id) for device_id in device_ids] if self.args.evaluator.lower() == "fulltrail": evaluator = FullTrailEvaluator( self.args.param_file, self.fintunee_script, options_str=options_str) elif self.args.evaluator.lower() == "populationbased": evaluator = PopulationBasedEvaluator( self.args.param_file, self.fintunee_script, options_str=options_str) else: raise ValueError( "The evaluate %s is not defined!" % self.args.evaluator) if self.args.tuning_strategy.lower() == "hazero": autoft = HAZero( evaluator, cudas=device_ids, popsize=self.args.popsize, output_dir=self.args.output_dir) elif self.args.tuning_strategy.lower() == "pshe2": autoft = PSHE2( evaluator, cudas=device_ids, popsize=self.args.popsize, output_dir=self.args.output_dir) else: raise ValueError("The tuning strategy %s is not defined!" % self.args.tuning_strategy) run_round_cnt = 0 solutions_modeldirs = {} print("PaddleHub Autofinetune starts.") while (not autoft.is_stop()) and run_round_cnt < self.args.round: print("PaddleHub Autofinetune starts round at %s." % run_round_cnt) output_dir = autoft._output_dir + "/round" + str(run_round_cnt) res = autoft.step(output_dir) solutions_modeldirs.update(res) evaluator.new_round() run_round_cnt = run_round_cnt + 1 print("PaddleHub Autofinetune ends.") best_hparams_origin = autoft.get_best_hparams() best_hparams_origin = autoft.mpi.bcast(best_hparams_origin) with open(autoft._output_dir + "/log_file.txt", "w") as f: best_hparams = evaluator.convert_params(best_hparams_origin) print("The final best hyperparameters:") f.write("The final best hyperparameters:\n") for index, hparam_name in enumerate(autoft.hparams_name_list): print("%s=%s" % (hparam_name, best_hparams[index])) f.write(hparam_name + "\t:\t" + str(best_hparams[index]) + "\n") best_hparams_dir, best_hparams_rank = solutions_modeldirs[tuple( best_hparams_origin)] print("The final best eval score is %s." % autoft.get_best_eval_value()) if autoft.mpi.multi_machine: print("The final best model parameters are saved as " + autoft._output_dir + "/best_model on rank " + str(best_hparams_rank) + " .") else: print("The final best model parameters are saved as " + autoft._output_dir + "/best_model .") f.write("The final best eval score is %s.\n" % autoft.get_best_eval_value()) best_model_dir = autoft._output_dir + "/best_model" if autoft.mpi.rank == best_hparams_rank: shutil.copytree(best_hparams_dir, best_model_dir) if autoft.mpi.multi_machine: f.write( "The final best model parameters are saved as ./best_model on rank " \ + str(best_hparams_rank) + " .") f.write("\t".join(autoft.hparams_name_list) + "\tsaved_params_dir\trank\n") else: f.write( "The final best model parameters are saved as ./best_model ." ) f.write("\t".join(autoft.hparams_name_list) + "\tsaved_params_dir\n") print( "The related information about hyperparamemters searched are saved as %s/log_file.txt ." % autoft._output_dir) for solution, modeldir in solutions_modeldirs.items(): param = evaluator.convert_params(solution) param = [str(p) for p in param] if autoft.mpi.multi_machine: f.write("\t".join(param) + "\t" + modeldir[0] + "\t" + str(modeldir[1]) + "\n") else: f.write("\t".join(param) + "\t" + modeldir[0] + "\n") return True command = AutoFineTuneCommand.instance()