# coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from multiprocessing.pool import ThreadPool import cma import copy import json import math import numpy as np import os import six import time from tb_paddle import SummaryWriter from paddlehub.common.logger import logger from paddlehub.common.utils import mkdir from paddlehub.autofinetune.evaluator import REWARD_SUM, TMP_HOME from paddlehub.autofinetune.mpi_helper import MPIHelper if six.PY3: INF = math.inf else: INF = float("inf") class BaseTuningStrategy(object): def __init__( self, evaluator, cudas=["0"], popsize=5, output_dir=None, ): self._num_thread = len(cudas) self._popsize = popsize self.cudas = cudas self.is_cuda_free = {"free": [], "busy": []} self.is_cuda_free["free"] = cudas self._round = 0 self.evaluator = evaluator self.init_input = evaluator.get_init_params() self.num_hparam = len(self.init_input) self.best_hparams_all_pop = [] self.best_reward_all_pop = INF self.current_hparams = [[0] * self.num_hparam] * self._popsize self.hparams_name_list = [ param["name"] for param in evaluator.params['param_list'] ] if output_dir is None: now = int(time.time()) time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now)) self._output_dir = "output_" + time_str else: self._output_dir = output_dir # record the information for the whole auto finetune self.writer = SummaryWriter(logdir=self._output_dir + '/visualization') # record the information for per population in all round self.writer_pop_trails = [] for i in range(self.popsize): writer_pop_trail = SummaryWriter( logdir=self._output_dir + '/visualization/pop_{}'.format(i)) self.writer_pop_trails.append(writer_pop_trail) # for parallel on mpi self.mpi = MPIHelper() if self.mpi.multi_machine: print("Autofinetune multimachine mode: running on {}".format( self.mpi.gather(self.mpi.name))) @property def thread(self): return self._num_thread @property def popsize(self): return self._popsize @property def output_dir(self): return self._output_dir @property def iteration(self): return self._iteration @property def round(self): return self._round def set_output_dir(self, output_dir=None): if output_dir is not None: output_dir = output_dir else: output_dir = self._output_dir return output_dir def randomSolution(self): solut = [0] * self.num_hparm for i in range(self.num_hparm): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: solut[i] = ( 1.0 - self.init_input[i]) * ratio + self.init_input[i] else: solut[i] = ( self.init_input[i] + 1.0) * ratio + self.init_input[i] return solut def smallPeturb(self): for i in range(self.popsize): for j in range(self.num_hparm): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: self.current_hparams[i][j] = ( 1.0 - self.current_hparams[i][j] ) * ratio * self.epsilon + self.current_hparams[i][j] else: self.current_hparams[i][j] = ( self.current_hparams[i][j] + 1.0) * ratio * self.epsilon + self.current_hparams[i][j] def estimatePopGradients(self): gradients = [[0] * self.num_hparm] * self.popsize for i in range(self.popsize): for j in range(self.num_hparm): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparms_all_pop[j] return gradients def estimateLocalGradients(self): gradients = [[0] * self.num_hparm] * self.popsize for i in range(self.popsize): for j in range(self.num_hparm): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparams_per_pop[i][j] return gradients def estimateMomemtum(self): popGrads = self.estimatePopGradients() localGrads = self.estimateLocalGradients() for i in range(self.popsize): for j in range(self.num_hparm): self.momentums[i][j] = ( 1 - 3.0 * self.alpha / self.iteration ) * self.momentums[i][j] - self.alpha * localGrads[i][ j] - self.alpha * popGrads[i][j] def is_stop(self): return False def get_current_hparams(self): return self.current_hparams def feedback(self, params_list, reward_list): raise NotImplementedError def get_best_hparams(self): return self.best_hparams_all_pop def get_best_eval_value(self): return REWARD_SUM - self.best_reward_all_pop def step(self, output_dir): solutions = self.get_current_hparams() params_cudas_dirs = [] solution_results = [] cnt = 0 solutions_modeldirs = {} mkdir(output_dir) solutions = self.mpi.bcast(solutions) # split solutions to "solutions for me" range_start, range_end = self.mpi.split_range(len(solutions)) my_solutions = solutions[range_start:range_end] for idx, solution in enumerate(my_solutions): cuda = self.is_cuda_free["free"][0] modeldir = output_dir + "/model-" + str(idx) + "/" log_file = output_dir + "/log-" + str(idx) + ".info" params_cudas_dirs.append([solution, cuda, modeldir, log_file]) solutions_modeldirs[tuple(solution)] = (modeldir, self.mpi.rank) self.is_cuda_free["free"].remove(cuda) self.is_cuda_free["busy"].append(cuda) if len(params_cudas_dirs ) == self.thread or idx == len(my_solutions) - 1: tp = ThreadPool(len(params_cudas_dirs)) solution_results += tp.map(self.evaluator.run, params_cudas_dirs) cnt += 1 tp.close() tp.join() for param_cuda in params_cudas_dirs: self.is_cuda_free["free"].append(param_cuda[1]) self.is_cuda_free["busy"].remove(param_cuda[1]) params_cudas_dirs = [] all_solution_results = self.mpi.gather(solution_results) if self.mpi.rank == 0: # only rank 0 need to feedback all_solution_results = [y for x in all_solution_results for y in x] self.feedback(solutions, all_solution_results) # remove the tmp.txt which records the eval results for trials tmp_file = os.path.join(TMP_HOME, "tmp.txt") if os.path.exists(tmp_file): os.remove(tmp_file) # collect all solutions_modeldirs collected_solutions_modeldirs = self.mpi.allgather(solutions_modeldirs) return_dict = {} for i in collected_solutions_modeldirs: return_dict.update(i) return return_dict class HAZero(BaseTuningStrategy): def __init__( self, evaluator, cudas=["0"], popsize=1, output_dir=None, sigma=0.2, ): super(HAZero, self).__init__(evaluator, cudas, popsize, output_dir) self._sigma = sigma self.evolution_stratefy = cma.CMAEvolutionStrategy( self.init_input, sigma, { 'popsize': self.popsize, 'bounds': [-1, 1], 'AdaptSigma': True, 'verb_disp': 1, 'verb_time': 'True', }) @property def sigma(self): return self._sigma def get_current_hparams(self): return self.evolution_stratefy.ask() def is_stop(self): return self.evolution_stratefy.stop() def feedback(self, params_list, reward_list): self._round = self._round + 1 local_min_reward = min(reward_list) local_min_reward_index = reward_list.index(local_min_reward) local_hparams = self.evaluator.convert_params( params_list[local_min_reward_index]) print("The local best eval value in the %s-th round is %s." % (self._round - 1, REWARD_SUM - local_min_reward)) print("The local best hyperparameters are as:") for index, hparam_name in enumerate(self.hparams_name_list): print("%s=%s" % (hparam_name, local_hparams[index])) if local_min_reward <= self.best_reward_all_pop: self.best_reward_all_pop = local_min_reward self.best_hparams_all_pop = params_list[local_min_reward_index] best_hparams = self.evaluator.convert_params(self.best_hparams_all_pop) for index, name in enumerate(self.hparams_name_list): self.writer.add_scalar( tag="hyperparameter_tuning/" + name, scalar_value=best_hparams[index], global_step=self.round) self.writer.add_scalar( tag="hyperparameter_tuning/best_eval_value", scalar_value=self.get_best_eval_value(), global_step=self.round) for pop_num in range(self.popsize): params = self.evaluator.convert_params(params_list[pop_num]) for index, name in enumerate(self.hparams_name_list): self.writer_pop_trails[pop_num].add_scalar( tag="population_transformation/" + name, scalar_value=params[index], global_step=self.round) self.writer_pop_trails[pop_num].add_scalar( tag="population_transformation/eval_value", scalar_value=(REWARD_SUM - reward_list[pop_num]), global_step=self.round) self.evolution_stratefy.tell(params_list, reward_list) self.evolution_stratefy.disp() def get_best_hparams(self): return list(self.evolution_stratefy.result.xbest) class PSHE2(BaseTuningStrategy): def __init__( self, evaluator, cudas=["0"], popsize=1, output_dir=None, alpha=0.5, epsilon=0.2, ): super(PSHE2, self).__init__(evaluator, cudas, popsize, output_dir) self._alpha = alpha self._epsilon = epsilon self.best_hparams_per_pop = [[0] * self.num_hparam] * self._popsize self.best_reward_per_pop = [INF] * self._popsize self.momentums = [[0] * self.num_hparam] * self._popsize for i in range(self.popsize): self.current_hparams[i] = self.set_random_hparam() @property def alpha(self): return self._alpha @property def epsilon(self): return self._epsilon def set_random_hparam(self): solut = [0] * self.num_hparam for i in range(self.num_hparam): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: solut[i] = ( 1.0 - self.init_input[i]) * ratio + self.init_input[i] else: solut[i] = ( self.init_input[i] + 1.0) * ratio + self.init_input[i] return solut def small_peturb(self): for i in range(self.popsize): for j in range(self.num_hparam): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: self.current_hparams[i][j] = ( 1.0 - self.current_hparams[i][j] ) * ratio * self.epsilon + self.current_hparams[i][j] else: self.current_hparams[i][j] = ( self.current_hparams[i][j] + 1.0) * ratio * self.epsilon + self.current_hparams[i][j] def estimate_popgradients(self): gradients = [[0] * self.num_hparam] * self.popsize for i in range(self.popsize): for j in range(self.num_hparam): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparams_all_pop[j] return gradients def estimate_local_gradients(self): gradients = [[0] * self.num_hparam] * self.popsize for i in range(self.popsize): for j in range(self.num_hparam): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparams_per_pop[i][j] return gradients def estimate_momemtum(self): popGrads = self.estimate_popgradients() localGrads = self.estimate_local_gradients() for i in range(self.popsize): for j in range(self.num_hparam): self.momentums[i][j] = ( 1 - 3.0 * self.alpha / self.round ) * self.momentums[i][j] - self.alpha * localGrads[i][ j] - self.alpha * popGrads[i][j] def is_stop(self): return False def feedback(self, params_list, reward_list): self._round = self._round + 1 local_min_reward = min(reward_list) local_min_reward_index = reward_list.index(local_min_reward) local_hparams = self.evaluator.convert_params( params_list[local_min_reward_index]) print("The local best eval value in the %s-th round is %s." % (self._round - 1, REWARD_SUM - local_min_reward)) print("The local best hyperparameters are as:") for index, hparam_name in enumerate(self.hparams_name_list): print("%s=%s" % (hparam_name, local_hparams[index])) for i in range(self.popsize): if reward_list[i] <= self.best_reward_per_pop[i]: self.best_hparams_per_pop[i] = copy.deepcopy( self.current_hparams[i]) self.best_reward_per_pop[i] = copy.deepcopy(reward_list[i]) if local_min_reward <= self.best_reward_all_pop: self.best_reward_all_pop = local_min_reward self.best_hparams_all_pop = copy.deepcopy( params_list[local_min_reward_index]) best_hparams = self.evaluator.convert_params(self.best_hparams_all_pop) for index, name in enumerate(self.hparams_name_list): self.writer.add_scalar( tag="hyperparameter_tuning/" + name, scalar_value=best_hparams[index], global_step=self.round) self.writer.add_scalar( tag="hyperparameter_tuning/best_eval_value", scalar_value=self.get_best_eval_value(), global_step=self.round) for pop_num in range(self.popsize): params = self.evaluator.convert_params(params_list[pop_num]) for index, name in enumerate(self.hparams_name_list): self.writer_pop_trails[pop_num].add_scalar( tag="population_transformation/" + name, scalar_value=params[index], global_step=self.round) self.writer_pop_trails[pop_num].add_scalar( tag="population_transformation/eval_value", scalar_value=(REWARD_SUM - reward_list[pop_num]), global_step=self.round) self.estimate_momemtum() for i in range(self.popsize): for j in range(len(self.init_input)): self.current_hparams[i][j] = self.current_hparams[i][ j] + self.alpha * self.momentums[i][j] self.small_peturb()