diff --git a/paddlehub/autofinetune/autoft.py b/paddlehub/autofinetune/autoft.py index 2ab1a857c16e01d926e53065ed5cc829a75e490d..ef5d2d6e9492969ce5b2d15787b5f0ea01dad7f4 100644 --- a/paddlehub/autofinetune/autoft.py +++ b/paddlehub/autofinetune/autoft.py @@ -164,15 +164,15 @@ class BaseTuningStrategy(object): params_cudas_dirs = [] solution_results = [] cnt = 0 - solutions_ckptdirs = {} + solutions_modeldirs = {} mkdir(output_dir) for idx, solution in enumerate(solutions): cuda = self.is_cuda_free["free"][0] - ckptdir = output_dir + "/ckpt-" + str(idx) + modeldir = output_dir + "/model-" + str(idx) + "/" log_file = output_dir + "/log-" + str(idx) + ".info" - params_cudas_dirs.append([solution, cuda, ckptdir, log_file]) - solutions_ckptdirs[tuple(solution)] = ckptdir + params_cudas_dirs.append([solution, cuda, modeldir, log_file]) + solutions_modeldirs[tuple(solution)] = modeldir self.is_cuda_free["free"].remove(cuda) self.is_cuda_free["busy"].append(cuda) if len(params_cudas_dirs @@ -190,7 +190,7 @@ class BaseTuningStrategy(object): self.feedback(solutions, solution_results) - return solutions_ckptdirs + return solutions_modeldirs class HAZero(BaseTuningStrategy): diff --git a/paddlehub/autofinetune/evaluator.py b/paddlehub/autofinetune/evaluator.py index 1a1fb24f415fd5f2c222c9390646177e6c9b57d2..eec1dd3952be92cc6d3adfeaef9334ec5266f389 100644 --- a/paddlehub/autofinetune/evaluator.py +++ b/paddlehub/autofinetune/evaluator.py @@ -36,11 +36,12 @@ else: class BaseEvaluator(object): - def __init__(self, params_file, finetunee_script): + def __init__(self, params_file, finetunee_script, options_str=""): with io.open(params_file, 'r', encoding='utf8') as f: self.params = yaml.safe_load(f) self.finetunee_script = finetunee_script self.model_rewards = {} + self.options_str = options_str def get_init_params(self): init_params = [] @@ -108,13 +109,14 @@ class BaseEvaluator(object): class FullTrailEvaluator(BaseEvaluator): - def __init__(self, params_file, finetunee_script): - super(FullTrailEvaluator, self).__init__(params_file, finetunee_script) + def __init__(self, params_file, finetunee_script, options_str=""): + super(FullTrailEvaluator, self).__init__( + params_file, finetunee_script, options_str=options_str) def run(self, *args): params = args[0][0] num_cuda = args[0][1] - ckpt_dir = args[0][2] + saved_params_dir = args[0][2] log_file = args[0][3] params = self.convert_params(params) if not self.is_valid_params(params): @@ -125,12 +127,11 @@ class FullTrailEvaluator(BaseEvaluator): f.close() if is_windows(): - run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file) + run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file) else: - run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file) - + run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file) try: os.system(run_cmd) with open(log_file, "r") as f: @@ -142,20 +143,21 @@ class FullTrailEvaluator(BaseEvaluator): % param_str.replace("--", "")) eval_result = 0.0 reward = self.get_reward(eval_result) - self.model_rewards[ckpt_dir] = reward + self.model_rewards[saved_params_dir] = reward return reward class ModelBasedEvaluator(BaseEvaluator): - def __init__(self, params_file, finetunee_script): - super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script) - self.half_best_model_ckpt = [] + def __init__(self, params_file, finetunee_script, options_str=""): + super(ModelBasedEvaluator, self).__init__( + params_file, finetunee_script, options_str=options_str) + self.half_best_model_path = [] self.run_count = 0 def run(self, *args): params = args[0][0] num_cuda = args[0][1] - ckpt_dir = args[0][2] + saved_params_dir = args[0][2] log_file = args[0][3] params = self.convert_params(params) if not self.is_valid_params(params): @@ -165,22 +167,23 @@ class ModelBasedEvaluator(BaseEvaluator): f = open(log_file, "w") f.close() - if len(self.half_best_model_ckpt) > 0: - model_path = self.half_best_model_ckpt[self.run_count % len( - self.half_best_model_ckpt)] + "/best_model" + if len(self.half_best_model_path) > 0: + model_path = self.half_best_model_path[self.run_count % len( + self.half_best_model_path)] if is_windows(): - run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file) + run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, model_path, saved_params_dir, param_str, self.options_str, log_file) else: - run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file) + run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, model_path, saved_params_dir, param_str, self.options_str, log_file) + else: if is_windows(): - run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file) + run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file) else: - run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \ - (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file) + run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \ + (num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file) self.run_count += 1 try: @@ -194,7 +197,7 @@ class ModelBasedEvaluator(BaseEvaluator): % param_str.replace("--", "")) eval_result = 0.0 reward = self.get_reward(eval_result) - self.model_rewards[ckpt_dir] = reward + self.model_rewards[saved_params_dir] = reward return reward def new_round(self): @@ -202,7 +205,7 @@ class ModelBasedEvaluator(BaseEvaluator): half_size = int(len(self.model_rewards) / 2) if half_size < 1: half_size = 1 - self.half_best_model_ckpt = list({ + self.half_best_model_path = list({ key for key in sorted( self.model_rewards, key=self.model_rewards.get, reverse=False) diff --git a/paddlehub/commands/autofinetune.py b/paddlehub/commands/autofinetune.py index f684ce69a05c7101e5e6ec482037a2f11b83da7b..bed2229f9856c99c8a2d484bd205d5bc54158e67 100644 --- a/paddlehub/commands/autofinetune.py +++ b/paddlehub/commands/autofinetune.py @@ -91,6 +91,22 @@ class AutoFineTuneCommand(BaseCommand): type=str, default="HAZero", help="Choices: HAZero or PSHE2.") + self.arg_config_group.add_argument( + 'opts', + help='See utils/config.py for all options', + default=None, + nargs=argparse.REMAINDER) + + def convert_to_other_options(self, config_list): + if len(config_list) % 2 != 0: + raise ValueError( + "Command for finetuned task options config format error! Please check it: {}" + .format(config_list)) + options_str = "" + for key, value in zip(config_list[0::2], config_list[1::2]): + options_str += "--" + key + "=" + value + " " + print(options_str) + return options_str def execute(self, argv): if not argv: @@ -109,6 +125,11 @@ class AutoFineTuneCommand(BaseCommand): description= "Autofintune configuration for controlling autofinetune behavior, not required" ) + self.arg_finetuned_task_group = self.parser.add_argument_group( + title="Finetuned task config options", + description= + "Finetuned task configuration for controlling finetuned task behavior, not required" + ) self.add_params_file_arg() self.add_autoft_config_arg() @@ -118,12 +139,20 @@ class AutoFineTuneCommand(BaseCommand): return False self.args = self.parser.parse_args(argv[1:]) + options_str = "" + if self.args.opts is not None: + options_str = self.convert_to_other_options(self.args.opts) + if self.args.evaluate_choice.lower() == "fulltrail": - evaluator = FullTrailEvaluator(self.args.param_file, - self.fintunee_script) + evaluator = FullTrailEvaluator( + self.args.param_file, + self.fintunee_script, + options_str=options_str) elif self.args.evaluate_choice.lower() == "modelbased": - evaluator = ModelBasedEvaluator(self.args.param_file, - self.fintunee_script) + evaluator = ModelBasedEvaluator( + self.args.param_file, + self.fintunee_script, + options_str=options_str) else: raise ValueError( "The evaluate %s is not defined!" % self.args.evaluate_choice) @@ -145,13 +174,13 @@ class AutoFineTuneCommand(BaseCommand): self.args.tuning_strategy) run_round_cnt = 0 - solutions_ckptdirs = {} + solutions_modeldirs = {} print("PaddleHub Autofinetune starts.") while (not autoft.is_stop()) and run_round_cnt < self.args.round: print("PaddleHub Autofinetune starts round at %s." % run_round_cnt) output_dir = autoft._output_dir + "/round" + str(run_round_cnt) res = autoft.step(output_dir) - solutions_ckptdirs.update(res) + solutions_modeldirs.update(res) evaluator.new_round() run_round_cnt = run_round_cnt + 1 print("PaddleHub Autofinetune ends.") @@ -164,17 +193,15 @@ class AutoFineTuneCommand(BaseCommand): print("%s=%s" % (hparam_name, best_hparams[index])) f.write(hparam_name + "\t:\t" + str(best_hparams[index]) + "\n") f.write("\n\n\n") - f.write("\t".join(autoft.hparams_name_list) + "\toutput_dir\n\n") - logger.info( - "The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ." - ) + f.write("\t".join(autoft.hparams_name_list) + + "\tsaved_params_dir\n\n") print( "The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ." ) - for solution, ckptdir in solutions_ckptdirs.items(): + for solution, modeldir in solutions_modeldirs.items(): param = evaluator.convert_params(solution) param = [str(p) for p in param] - f.write("\t".join(param) + "\t" + ckptdir + "\n\n") + f.write("\t".join(param) + "\t" + modeldir + "\n\n") return True