# coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from multiprocessing.pool import ThreadPool import copy import json import math import numpy as np import six import time from paddlehub.common.logger import logger from paddlehub.common.utils import mkdir if six.PY3: INF = math.inf else: INF = float("inf") class PSHE2(object): def __init__( self, evaluator, cudas=["0"], popsize=5, output_dir=None, alpha=0.5, epsilon=0.2, ): self._num_thread = len(cudas) self._popsize = popsize self._alpha = alpha self._epsilon = epsilon self._iteration = 0 self.cudas = cudas self.is_cuda_free = {"free": [], "busy": []} self.is_cuda_free["free"] = cudas self.evaluator = evaluator self.init_input = evaluator.get_init_params() self.num_hparm = len(self.init_input) self.best_hparams_per_pop = [[0] * self.num_hparm] * self._popsize self.best_reward_per_pop = [INF] * self._popsize self.momentums = [[0] * self.num_hparm] * self._popsize self.best_hparms_all_pop = [] self.best_reward_all_pop = INF self.current_hparams = [[0] * self.num_hparm] * self._popsize for i in range(self.popsize): self.current_hparams[i] = self.randomSolution() if output_dir is None: now = int(time.time()) time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now)) self._output_dir = "output_" + time_str else: self._output_dir = output_dir @property def thread(self): return self._num_thread @property def popsize(self): return self._popsize @property def alpha(self): return self._alpha @property def epsilon(self): return self._epsilon @property def output_dir(self): return self._output_dir @property def iteration(self): return self._iteration def set_output_dir(self, output_dir=None): if output_dir is not None: output_dir = output_dir else: output_dir = self._output_dir return output_dir def randomSolution(self): solut = [0] * self.num_hparm for i in range(self.num_hparm): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: solut[i] = ( 1.0 - self.init_input[i]) * ratio + self.init_input[i] else: solut[i] = ( self.init_input[i] + 1.0) * ratio + self.init_input[i] return solut def smallPeturb(self): for i in range(self.popsize): for j in range(self.num_hparm): ratio = (np.random.random_sample() - 0.5) * 2.0 if ratio >= 0: self.current_hparams[i][j] = ( 1.0 - self.current_hparams[i][j] ) * ratio * self.epsilon + self.current_hparams[i][j] else: self.current_hparams[i][j] = ( self.current_hparams[i][j] + 1.0) * ratio * self.epsilon + self.current_hparams[i][j] def estimatePopGradients(self): gradients = [[0] * self.num_hparm] * self.popsize for i in range(self.popsize): for j in range(self.num_hparm): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparms_all_pop[j] return gradients def estimateLocalGradients(self): gradients = [[0] * self.num_hparm] * self.popsize for i in range(self.popsize): for j in range(self.num_hparm): gradients[i][j] = self.current_hparams[i][ j] - self.best_hparams_per_pop[i][j] return gradients def estimateMomemtum(self): popGrads = self.estimatePopGradients() localGrads = self.estimateLocalGradients() for i in range(self.popsize): for j in range(self.num_hparm): self.momentums[i][j] = ( 1 - 3.0 * self.alpha / self.iteration ) * self.momentums[i][j] - self.alpha * localGrads[i][ j] - self.alpha * popGrads[i][j] def is_stop(self): return False def solutions(self): return self.current_hparams def feedback(self, params_list, reward_list): self._iteration = self._iteration + 1 for i in range(self.popsize): if reward_list[i] < self.best_reward_per_pop[i]: self.best_hparams_per_pop[i] = copy.deepcopy( self.current_hparams[i]) self.best_reward_per_pop[i] = reward_list[i] if reward_list[i] < self.best_reward_all_pop: self.best_hparms_all_pop = self.current_hparams[i] self.best_reward_all_pop = reward_list[i] self.estimateMomemtum() for i in range(self.popsize): for j in range(len(self.init_input)): self.current_hparams[i][j] = self.current_hparams[i][ j] + self.alpha * self.momentums[i][j] self.smallPeturb() def optimal_solution(self): return self.best_hparms_all_pop def step(self, output_dir): solutions = self.solutions() params_cudas_dirs = [] solution_results = [] cnt = 0 solutions_ckptdirs = {} mkdir(output_dir) for idx, solution in enumerate(solutions): cuda = self.is_cuda_free["free"][0] ckptdir = output_dir + "/ckpt-" + str(idx) log_file = output_dir + "/log-" + str(idx) + ".info" params_cudas_dirs.append([solution, cuda, ckptdir, log_file]) solutions_ckptdirs[tuple(solution)] = ckptdir self.is_cuda_free["free"].remove(cuda) self.is_cuda_free["busy"].append(cuda) if len(params_cudas_dirs) == self.thread or cnt == int( self.popsize / self.thread): tp = ThreadPool(len(params_cudas_dirs)) solution_results += tp.map(self.evaluator.run, params_cudas_dirs) cnt += 1 tp.close() tp.join() for param_cuda in params_cudas_dirs: self.is_cuda_free["free"].append(param_cuda[1]) self.is_cuda_free["busy"].remove(param_cuda[1]) params_cudas_dirs = [] self.feedback(solutions, solution_results) return solutions_ckptdirs