提交 6a386bb3 编写于 作者: S Steffy-zxf 提交者: wuzewu

Add autofinetune (#153)

* Add autoft (#127)

* add autofinetune

* update autofinetune readme
上级 cdb30bbb
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
import cma
import copy import copy
import json import json
import math import math
...@@ -20,8 +21,10 @@ import numpy as np ...@@ -20,8 +21,10 @@ import numpy as np
import six import six
import time import time
from tb_paddle import SummaryWriter
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
from paddlehub.common.utils import mkdir from paddlehub.common.utils import mkdir
from paddlehub.autofinetune.evaluator import REWARD_SUM
if six.PY3: if six.PY3:
INF = math.inf INF = math.inf
...@@ -29,38 +32,30 @@ else: ...@@ -29,38 +32,30 @@ else:
INF = float("inf") INF = float("inf")
class PSHE2(object): class BaseTuningStrategy(object):
def __init__( def __init__(
self, self,
evaluator, evaluator,
cudas=["0"], cudas=["0"],
popsize=5, popsize=5,
output_dir=None, output_dir=None,
alpha=0.5,
epsilon=0.2,
): ):
self._num_thread = len(cudas) self._num_thread = len(cudas)
self._popsize = popsize self._popsize = popsize
self._alpha = alpha
self._epsilon = epsilon
self._iteration = 0
self.cudas = cudas self.cudas = cudas
self.is_cuda_free = {"free": [], "busy": []} self.is_cuda_free = {"free": [], "busy": []}
self.is_cuda_free["free"] = cudas self.is_cuda_free["free"] = cudas
self._round = 0
self.evaluator = evaluator self.evaluator = evaluator
self.init_input = evaluator.get_init_params() self.init_input = evaluator.get_init_params()
self.num_hparm = len(self.init_input) self.num_hparam = len(self.init_input)
self.best_hparams_all_pop = []
self.best_hparams_per_pop = [[0] * self.num_hparm] * self._popsize
self.best_reward_per_pop = [INF] * self._popsize
self.momentums = [[0] * self.num_hparm] * self._popsize
self.best_hparms_all_pop = []
self.best_reward_all_pop = INF self.best_reward_all_pop = INF
self.current_hparams = [[0] * self.num_hparm] * self._popsize self.current_hparams = [[0] * self.num_hparam] * self._popsize
for i in range(self.popsize): self.hparams_name_list = [
self.current_hparams[i] = self.randomSolution() param["name"] for param in evaluator.params['param_list']
]
if output_dir is None: if output_dir is None:
now = int(time.time()) now = int(time.time())
...@@ -68,6 +63,7 @@ class PSHE2(object): ...@@ -68,6 +63,7 @@ class PSHE2(object):
self._output_dir = "output_" + time_str self._output_dir = "output_" + time_str
else: else:
self._output_dir = output_dir self._output_dir = output_dir
self.writer = SummaryWriter(logdir=self._output_dir + '/tb_paddle')
@property @property
def thread(self): def thread(self):
...@@ -77,14 +73,6 @@ class PSHE2(object): ...@@ -77,14 +73,6 @@ class PSHE2(object):
def popsize(self): def popsize(self):
return self._popsize return self._popsize
@property
def alpha(self):
return self._alpha
@property
def epsilon(self):
return self._epsilon
@property @property
def output_dir(self): def output_dir(self):
return self._output_dir return self._output_dir
...@@ -93,6 +81,10 @@ class PSHE2(object): ...@@ -93,6 +81,10 @@ class PSHE2(object):
def iteration(self): def iteration(self):
return self._iteration return self._iteration
@property
def round(self):
return self._round
def set_output_dir(self, output_dir=None): def set_output_dir(self, output_dir=None):
if output_dir is not None: if output_dir is not None:
output_dir = output_dir output_dir = output_dir
...@@ -154,31 +146,20 @@ class PSHE2(object): ...@@ -154,31 +146,20 @@ class PSHE2(object):
def is_stop(self): def is_stop(self):
return False return False
def solutions(self): def get_current_hparams(self):
return self.current_hparams return self.current_hparams
def feedback(self, params_list, reward_list): def feedback(self, params_list, reward_list):
self._iteration = self._iteration + 1 return NotImplementedError
for i in range(self.popsize):
if reward_list[i] < self.best_reward_per_pop[i]: def get_best_hparams(self):
self.best_hparams_per_pop[i] = copy.deepcopy( return self.best_hparams_all_pop
self.current_hparams[i])
self.best_reward_per_pop[i] = reward_list[i]
if reward_list[i] < self.best_reward_all_pop:
self.best_hparms_all_pop = self.current_hparams[i]
self.best_reward_all_pop = reward_list[i]
self.estimateMomemtum()
for i in range(self.popsize):
for j in range(len(self.init_input)):
self.current_hparams[i][j] = self.current_hparams[i][
j] + self.alpha * self.momentums[i][j]
self.smallPeturb()
def optimal_solution(self): def get_best_eval_value(self):
return self.best_hparms_all_pop return REWARD_SUM - self.best_reward_all_pop
def step(self, output_dir): def step(self, output_dir):
solutions = self.solutions() solutions = self.get_current_hparams()
params_cudas_dirs = [] params_cudas_dirs = []
solution_results = [] solution_results = []
...@@ -209,3 +190,195 @@ class PSHE2(object): ...@@ -209,3 +190,195 @@ class PSHE2(object):
self.feedback(solutions, solution_results) self.feedback(solutions, solution_results)
return solutions_ckptdirs return solutions_ckptdirs
class HAZero(BaseTuningStrategy):
def __init__(
self,
evaluator,
cudas=["0"],
popsize=1,
output_dir=None,
sigma=0.2,
):
super(HAZero, self).__init__(evaluator, cudas, popsize, output_dir)
self._sigma = sigma
self.evolution_stratefy = cma.CMAEvolutionStrategy(
self.init_input, sigma, {
'popsize': self.popsize,
'bounds': [-1, 1],
'AdaptSigma': True,
'verb_disp': 1,
'verb_time': 'True',
})
@property
def sigma(self):
return self._sigma
def get_current_hparams(self):
return self.evolution_stratefy.ask()
def is_stop(self):
return self.evolution_stratefy.stop()
def feedback(self, params_list, reward_list):
self._round = self._round + 1
local_min_reward = min(reward_list)
local_min_reward_index = reward_list.index(local_min_reward)
local_hparams = self.evaluator.convert_params(
params_list[local_min_reward_index])
print("The local best eval value in the %s-th round is %s." %
(self._round - 1, REWARD_SUM - local_min_reward))
print("The local best hyperparameters are as:")
for index, hparam_name in enumerate(self.hparams_name_list):
print("%s=%s" % (hparam_name, local_hparams[index]))
for i in range(self.popsize):
if reward_list[i] < self.best_reward_all_pop:
self.best_hparams_all_pop = self.current_hparams[i]
self.best_reward_all_pop = reward_list[i]
best_hparams = self.evaluator.convert_params(self.best_hparams_all_pop)
for index, name in enumerate(self.hparams_name_list):
self.writer.add_scalar(
tag="hyperparameter tuning/" + name,
scalar_value=best_hparams[index],
global_step=self.round)
self.writer.add_scalar(
tag="hyperparameter tuning/best_eval_value",
scalar_value=self.get_best_eval_value(),
global_step=self.round)
self.evolution_stratefy.tell(params_list, reward_list)
self.evolution_stratefy.disp()
def get_best_hparams(self):
return list(self.evolution_stratefy.result.xbest)
class PSHE2(BaseTuningStrategy):
def __init__(
self,
evaluator,
cudas=["0"],
popsize=1,
output_dir=None,
alpha=0.5,
epsilon=0.2,
):
super(PSHE2, self).__init__(evaluator, cudas, popsize, output_dir)
self._alpha = alpha
self._epsilon = epsilon
self.best_hparams_per_pop = [[0] * self.num_hparam] * self._popsize
self.best_reward_per_pop = [INF] * self._popsize
self.momentums = [[0] * self.num_hparam] * self._popsize
for i in range(self.popsize):
self.current_hparams[i] = self.set_random_hparam()
@property
def alpha(self):
return self._alpha
@property
def epsilon(self):
return self._epsilon
def set_random_hparam(self):
solut = [0] * self.num_hparam
for i in range(self.num_hparam):
ratio = (np.random.random_sample() - 0.5) * 2.0
if ratio >= 0:
solut[i] = (
1.0 - self.init_input[i]) * ratio + self.init_input[i]
else:
solut[i] = (
self.init_input[i] + 1.0) * ratio + self.init_input[i]
return solut
def small_peturb(self):
for i in range(self.popsize):
for j in range(self.num_hparam):
ratio = (np.random.random_sample() - 0.5) * 2.0
if ratio >= 0:
self.current_hparams[i][j] = (
1.0 - self.current_hparams[i][j]
) * ratio * self.epsilon + self.current_hparams[i][j]
else:
self.current_hparams[i][j] = (
self.current_hparams[i][j] +
1.0) * ratio * self.epsilon + self.current_hparams[i][j]
def estimate_popgradients(self):
gradients = [[0] * self.num_hparam] * self.popsize
for i in range(self.popsize):
for j in range(self.num_hparam):
gradients[i][j] = self.current_hparams[i][
j] - self.best_hparams_all_pop[j]
return gradients
def estimate_local_gradients(self):
gradients = [[0] * self.num_hparam] * self.popsize
for i in range(self.popsize):
for j in range(self.num_hparam):
gradients[i][j] = self.current_hparams[i][
j] - self.best_hparams_per_pop[i][j]
return gradients
def estimate_momemtum(self):
popGrads = self.estimate_popgradients()
localGrads = self.estimate_local_gradients()
for i in range(self.popsize):
for j in range(self.num_hparam):
self.momentums[i][j] = (
1 - 3.0 * self.alpha / self.round
) * self.momentums[i][j] - self.alpha * localGrads[i][
j] - self.alpha * popGrads[i][j]
def is_stop(self):
return False
def feedback(self, params_list, reward_list):
self._round = self._round + 1
local_min_reward = min(reward_list)
local_min_reward_index = reward_list.index(local_min_reward)
local_hparams = self.evaluator.convert_params(
params_list[local_min_reward_index])
print("The local best eval value in the %s-th round is %s." %
(self._round - 1, REWARD_SUM - local_min_reward))
print("The local best hyperparameters are as:")
for index, hparam_name in enumerate(self.hparams_name_list):
print("%s=%s" % (hparam_name, local_hparams[index]))
for i in range(self.popsize):
if reward_list[i] < self.best_reward_per_pop[i]:
self.best_hparams_per_pop[i] = copy.deepcopy(
self.current_hparams[i])
self.best_reward_per_pop[i] = reward_list[i]
if reward_list[i] < self.best_reward_all_pop:
self.best_hparams_all_pop = self.current_hparams[i]
self.best_reward_all_pop = reward_list[i]
best_hparams = self.evaluator.convert_params(self.best_hparams_all_pop)
for index, name in enumerate(self.hparams_name_list):
self.writer.add_scalar(
tag="hyperparameter tuning/" + name,
scalar_value=best_hparams[index],
global_step=self.round)
self.writer.add_scalar(
tag="hyperparameter tuning/best_eval_value",
scalar_value=self.get_best_eval_value(),
global_step=self.round)
self.estimate_momemtum()
for i in range(self.popsize):
for j in range(len(self.init_input)):
self.current_hparams[i][j] = self.current_hparams[i][
j] + self.alpha * self.momentums[i][j]
self.small_peturb()
...@@ -40,6 +40,7 @@ class BaseEvaluator(object): ...@@ -40,6 +40,7 @@ class BaseEvaluator(object):
with io.open(params_file, 'r', encoding='utf8') as f: with io.open(params_file, 'r', encoding='utf8') as f:
self.params = yaml.safe_load(f) self.params = yaml.safe_load(f)
self.finetunee_script = finetunee_script self.finetunee_script = finetunee_script
self.model_rewards = {}
def get_init_params(self): def get_init_params(self):
init_params = [] init_params = []
...@@ -134,7 +135,7 @@ class FullTrailEvaluator(BaseEvaluator): ...@@ -134,7 +135,7 @@ class FullTrailEvaluator(BaseEvaluator):
os.system(run_cmd) os.system(run_cmd)
with open(log_file, "r") as f: with open(log_file, "r") as f:
lines = f.readlines() lines = f.readlines()
eval_result = lines[-1] eval_result = float(lines[-1])
except: except:
print( print(
"WARNING: Program which was ran with hyperparameters as %s was crashed!" "WARNING: Program which was ran with hyperparameters as %s was crashed!"
...@@ -148,7 +149,6 @@ class FullTrailEvaluator(BaseEvaluator): ...@@ -148,7 +149,6 @@ class FullTrailEvaluator(BaseEvaluator):
class ModelBasedEvaluator(BaseEvaluator): class ModelBasedEvaluator(BaseEvaluator):
def __init__(self, params_file, finetunee_script): def __init__(self, params_file, finetunee_script):
super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script) super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script)
self.model_rewards = {}
self.half_best_model_ckpt = [] self.half_best_model_ckpt = []
self.run_count = 0 self.run_count = 0
...@@ -187,7 +187,7 @@ class ModelBasedEvaluator(BaseEvaluator): ...@@ -187,7 +187,7 @@ class ModelBasedEvaluator(BaseEvaluator):
os.system(run_cmd) os.system(run_cmd)
with open(log_file, "r") as f: with open(log_file, "r") as f:
lines = f.readlines() lines = f.readlines()
eval_result = lines[-1] eval_result = float(lines[-1])
except: except:
print( print(
"WARNING: Program which was ran with hyperparameters as %s was crashed!" "WARNING: Program which was ran with hyperparameters as %s was crashed!"
...@@ -198,7 +198,7 @@ class ModelBasedEvaluator(BaseEvaluator): ...@@ -198,7 +198,7 @@ class ModelBasedEvaluator(BaseEvaluator):
return reward return reward
def new_round(self): def new_round(self):
"""update self.half_best_model""" """update half_best_model"""
half_size = int(len(self.model_rewards) / 2) half_size = int(len(self.model_rewards) / 2)
if half_size < 1: if half_size < 1:
half_size = 1 half_size = 1
......
...@@ -31,6 +31,7 @@ import numpy as np ...@@ -31,6 +31,7 @@ import numpy as np
from paddlehub.commands.base_command import BaseCommand, ENTRY from paddlehub.commands.base_command import BaseCommand, ENTRY
from paddlehub.common.arg_helper import add_argument, print_arguments from paddlehub.common.arg_helper import add_argument, print_arguments
from paddlehub.autofinetune.autoft import PSHE2 from paddlehub.autofinetune.autoft import PSHE2
from paddlehub.autofinetune.autoft import HAZero
from paddlehub.autofinetune.evaluator import FullTrailEvaluator from paddlehub.autofinetune.evaluator import FullTrailEvaluator
from paddlehub.autofinetune.evaluator import ModelBasedEvaluator from paddlehub.autofinetune.evaluator import ModelBasedEvaluator
from paddlehub.common.logger import logger from paddlehub.common.logger import logger
...@@ -71,6 +72,7 @@ class AutoFineTuneCommand(BaseCommand): ...@@ -71,6 +72,7 @@ class AutoFineTuneCommand(BaseCommand):
"--cuda", "--cuda",
type=ast.literal_eval, type=ast.literal_eval,
default=['0'], default=['0'],
required=True,
help="The list of gpu devices to be used") help="The list of gpu devices to be used")
self.arg_config_group.add_argument( self.arg_config_group.add_argument(
"--round", type=int, default=10, help="Number of searches") "--round", type=int, default=10, help="Number of searches")
...@@ -84,6 +86,11 @@ class AutoFineTuneCommand(BaseCommand): ...@@ -84,6 +86,11 @@ class AutoFineTuneCommand(BaseCommand):
type=str, type=str,
default="fulltrail", default="fulltrail",
help="Choices: fulltrail or modelbased.") help="Choices: fulltrail or modelbased.")
self.arg_config_group.add_argument(
"--tuning_strategy",
type=str,
default="HAZero",
help="Choices: HAZero or PSHE2.")
def execute(self, argv): def execute(self, argv):
if not argv: if not argv:
...@@ -121,11 +128,21 @@ class AutoFineTuneCommand(BaseCommand): ...@@ -121,11 +128,21 @@ class AutoFineTuneCommand(BaseCommand):
raise ValueError( raise ValueError(
"The evaluate %s is not defined!" % self.args.evaluate_choice) "The evaluate %s is not defined!" % self.args.evaluate_choice)
autoft = PSHE2( if self.args.tuning_strategy.lower() == "hazero":
evaluator, autoft = HAZero(
cudas=self.args.cuda, evaluator,
popsize=self.args.popsize, cudas=self.args.cuda,
output_dir=self.args.output_dir) popsize=self.args.popsize,
output_dir=self.args.output_dir)
elif self.args.tuning_strategy.lower() == "pshe2":
autoft = PSHE2(
evaluator,
cudas=self.args.cuda,
popsize=self.args.popsize,
output_dir=self.args.output_dir)
else:
raise ValueError("The tuning strategy %s is not defined!" %
self.args.tuning_strategy)
run_round_cnt = 0 run_round_cnt = 0
solutions_ckptdirs = {} solutions_ckptdirs = {}
...@@ -138,23 +155,21 @@ class AutoFineTuneCommand(BaseCommand): ...@@ -138,23 +155,21 @@ class AutoFineTuneCommand(BaseCommand):
evaluator.new_round() evaluator.new_round()
run_round_cnt = run_round_cnt + 1 run_round_cnt = run_round_cnt + 1
print("PaddleHub Autofinetune ends.") print("PaddleHub Autofinetune ends.")
with open("./log_file.txt", "w") as f: with open("./log_file.txt", "w") as f:
best_choice = evaluator.convert_params(autoft.optimal_solution()) best_hparams = evaluator.convert_params(autoft.get_best_hparams())
print("The best hyperparameters:") print("The final best hyperparameters:")
f.write("The best hyperparameters:\n") f.write("The final best hyperparameters:\n")
param_name = [] for index, hparam_name in enumerate(autoft.hparams_name_list):
for idx, param in enumerate(evaluator.params["param_list"]): print("%s=%s" % (hparam_name, best_hparams[index]))
param_name.append(param["name"]) f.write(hparam_name + "\t:\t" + str(best_hparams[index]) + "\n")
f.write(param["name"] + "\t:\t" + str(best_choice[idx]) + "\n")
print("%s : %s" % (param["name"], best_choice[idx]))
f.write("\n\n\n") f.write("\n\n\n")
f.write("\t".join(param_name) + "\toutput_dir\n\n") f.write("\t".join(autoft.hparams_name_list) + "\toutput_dir\n\n")
logger.info( logger.info(
"The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ." "The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ."
) )
print( print(
"The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ." "The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ."
) )
for solution, ckptdir in solutions_ckptdirs.items(): for solution, ckptdir in solutions_ckptdirs.items():
param = evaluator.convert_params(solution) param = evaluator.convert_params(solution)
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 利用PaddleHub Auto Fine-tune进行自动超参搜索\n",
"\n",
"## 一、简介\n",
"\n",
"机器学习训练模型的过程中自然少不了调参。模型的参数可分成两类:参数与超参数,前者是模型通过自身的训练学习得到的参数数据;后者则需要通过人工经验设置(如学习率、dropout_rate、batch_size等),以提高模型训练的效果。当前模型往往参数空间大,手动调参十分耗时,尝试成本高。PaddleHub Auto Fine-tune可以实现自动调整超参数。\n",
"\n",
"PaddleHub Auto Fine-tune提供两种搜索超参策略:\n",
"\n",
"* HAZero: 核心思想是通过对正态分布中协方差矩阵的调整来处理变量之间的依赖关系和scaling。算法基本可以分成以下三步: 采样产生新解;计算目标函数值;更新正太分布参数。调整参数的基本思路为,调整参数使得产生好解的概率逐渐增大\n",
"\n",
"* PSHE2: 采用粒子群算法,最优超参数组合就是所求问题的解。现在想求得最优解就是要找到更新超参数组合,即如何更新超参数,才能让算法更快更好的收敛到最优解。PSO算法根据超参数本身历史的最优,在一定随机扰动的情况下决定下一步的更新方向。\n",
"\n",
"\n",
"PaddleHub Auto Fine-tune提供两种超参评估策略:\n",
"\n",
"* FullTrail: 给定一组超参,利用这组超参从头开始Finetune一个新模型,之后在数据集dev部分评估这个模型\n",
"\n",
"* ModelBased: 给定一组超参,若这组超参来自第一轮搜索的超参,则从头开始Finetune一个新模型;若这组超参数不是来自第一轮搜索的超参数,则程序会加载前几轮已经Fine-tune完毕后保存的较好模型,基于这个模型,在当前的超参数组合下继续Finetune。这个Fine-tune完毕后保存的较好模型,评估方式是这个模型在数据集dev部分的效果。\n",
"\n",
"## 二、准备工作\n",
"\n",
"使用PaddleHub Auto Fine-tune必须准备两个文件,并且这两个文件需要按照指定的格式书写。这两个文件分别是需要Fine-tune的python脚本finetuee.py和需要搜索的超参数信息yaml文件hparam.yaml。\n",
"\n",
"以Fine-tune中文情感分类任务为例,我们展示如何利用PaddleHub Auto Finetune进行自动搜素超参。\n",
"\n",
"以下是待搜索超参数的yaml文件hparam.yaml,包含需要搜素的超参名字、类型、范围等信息。其中类型只支持float和int类型\n",
"```\n",
"param_list:\n",
"- name : learning_rate\n",
" init_value : 0.001\n",
" type : float\n",
" lower_than : 0.05\n",
" greater_than : 0.000005\n",
"- name : weight_decay\n",
" init_value : 0.1\n",
" type : float\n",
" lower_than : 1\n",
" greater_than : 0.0\n",
"- name : batch_size\n",
" init_value : 32\n",
" type : int\n",
" lower_than : 40\n",
" greater_than : 30\n",
"- name : warmup_prop\n",
" init_value : 0.1\n",
" type : float\n",
" lower_than : 0.2\n",
" greater_than : 0.0\n",
"```\n",
"\n",
"**NOTE:** 该yaml文件的最外层级的key必须是param_list\n",
"\n",
"\n",
"以下是中文情感分类的finetunee.py\n",
"\n",
"```python\n",
"from __future__ import absolute_import\n",
"from __future__ import division\n",
"from __future__ import print_function\n",
"\n",
"import argparse\n",
"import ast\n",
"\n",
"import paddle.fluid as fluid\n",
"import paddlehub as hub\n",
"import os\n",
"from paddlehub.common.logger import logger\n",
"\n",
"# yapf: disable\n",
"parser = argparse.ArgumentParser(__doc__)\n",
"parser.add_argument(\"--epochs\", type=int, default=3, help=\"epochs.\")\n",
"parser.add_argument(\"--batch_size\", type=int, default=32, help=\"batch_size.\")\n",
"parser.add_argument(\"--learning_rate\", type=float, default=5e-5, help=\"learning_rate.\")\n",
"parser.add_argument(\"--warmup_prop\", type=float, default=0.1, help=\"warmup_prop.\")\n",
"parser.add_argument(\"--weight_decay\", type=float, default=0.01, help=\"weight_decay.\")\n",
"parser.add_argument(\"--max_seq_len\", type=int, default=128, help=\"Number of words of the longest seqence.\")\n",
"parser.add_argument(\"--checkpoint_dir\", type=str, default=None, help=\"Directory to model checkpoint\")\n",
"parser.add_argument(\"--model_path\", type=str, default=\"\", help=\"load model path\")\n",
"args = parser.parse_args()\n",
"# yapf: enable.\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
" # Load Paddlehub ERNIE pretrained model\n",
" module = hub.Module(name=\"ernie\")\n",
" inputs, outputs, program = module.context(\n",
" trainable=True, max_seq_len=args.max_seq_len)\n",
"\n",
" # Download dataset and use ClassifyReader to read dataset\n",
" dataset = hub.dataset.ChnSentiCorp()\n",
" metrics_choices = [\"acc\"]\n",
"\n",
" reader = hub.reader.ClassifyReader(\n",
" dataset=dataset,\n",
" vocab_path=module.get_vocab_path(),\n",
" max_seq_len=args.max_seq_len)\n",
"\n",
" # Construct transfer learning network\n",
" # Use \"pooled_output\" for classification tasks on an entire sentence.\n",
" pooled_output = outputs[\"pooled_output\"]\n",
"\n",
" # Setup feed list for data feeder\n",
" # Must feed all the tensor of ERNIE's module need\n",
" feed_list = [\n",
" inputs[\"input_ids\"].name,\n",
" inputs[\"position_ids\"].name,\n",
" inputs[\"segment_ids\"].name,\n",
" inputs[\"input_mask\"].name,\n",
" ]\n",
"\n",
" # Select finetune strategy, setup config and finetune\n",
" strategy = hub.AdamWeightDecayStrategy(\n",
" warmup_proportion=args.warmup_prop,\n",
" learning_rate=args.learning_rate,\n",
" weight_decay=args.weight_decay,\n",
" lr_scheduler=\"linear_decay\")\n",
"\n",
" # Setup runing config for PaddleHub Finetune API\n",
" config = hub.RunConfig(\n",
" checkpoint_dir=args.checkpoint_dir,\n",
" use_cuda=True,\n",
" num_epoch=args.epochs,\n",
" batch_size=args.batch_size,\n",
" enable_memory_optim=True,\n",
" strategy=strategy)\n",
"\n",
" # Define a classfication finetune task by PaddleHub's API\n",
" cls_task = hub.TextClassifierTask(\n",
" data_reader=reader,\n",
" feature=pooled_output,\n",
" feed_list=feed_list,\n",
" num_classes=dataset.num_labels,\n",
" config=config,\n",
" metrics_choices=metrics_choices)\n",
"\n",
" # Finetune and evaluate by PaddleHub's API\n",
" if args.model_path != \"\":\n",
" with cls_task.phase_guard(phase=\"train\"):\n",
" cls_task.init_if_necessary()\n",
" cls_task.load_parameters(args.model_path)\n",
" logger.info(\"PaddleHub has loaded model from %s\" % args.model_path)\n",
"\n",
" run_states = cls_task.finetune()\n",
" train_avg_score, train_avg_loss, train_run_speed = cls_task._calculate_metrics(run_states)\n",
"\n",
" run_states = cls_task.eval()\n",
" eval_avg_score, eval_avg_loss, eval_run_speed = cls_task._calculate_metrics(run_states)\n",
"\n",
"print(eval_avg_score[\"acc\"], end=\"\")\n",
"```\n",
"**Note**:以上是finetunee.py的写法。\n",
"> finetunee.py必须可以接收待搜素超参数选项参数, 并且待搜素超参数选项名字和yaml文件中的超参数名字保持一致.\n",
"\n",
"> finetunee.py必须有checkpoint_dir这个选项。\n",
"\n",
"> PaddleHub Auto Fine-tune超参评估策略选择为ModelBased,finetunee.py必须有model_path选项。\n",
"\n",
"> PaddleHub Auto Fine-tune搜索超参策略选择hazero时,必须提供两个以上的待搜索超参。\n",
"\n",
"> finetunee.py的最后一个输出必须是模型在数据集dev上的评价效果,同时以“”结束,如print(eval_avg_score[\"acc\"], end=\"\"). \n",
"\n",
"\n",
"\n",
"## 三、启动方式\n",
"\n",
"**确认安装PaddleHub版本在1.2.0以上。**\n",
"\n",
"通过以下命令方式:\n",
"```shell\n",
"$ OUTPUT=result/\n",
"$ hub autofientune finetunee.py --param_file=hparam.yaml --cuda=['1','2'] --popsize=5 --round=10 \n",
"$ --output_dir=${OUTPUT} --evaluate_choice=fulltrail --strategy=hazero\n",
"```\n",
"\n",
"其中,选项\n",
"\n",
"> `--param_file`: 需要搜索的超参数信息yaml文件\n",
"\n",
"> `--cuda`: 设置运行程序的可用GPU卡号,list类型,中间以逗号隔开,不能有空格,默认为[‘0’]\n",
"\n",
"> `--popsize`: 设置程序运行每轮产生的超参组合数,默认为5\n",
"\n",
"> `--round`: 设置程序运行的轮数,默认是10\n",
"\n",
"> `--output_dir`: 设置程序运行输出结果存放目录,可选,不指定该选项参数时,在当前运行路径下生成存放程序运行输出信息的文件夹\n",
"\n",
"> `--evaluate_choice`: 设置自动搜索超参的评价效果方式,可选fulltrail和modelbased, 默认为fulltrail\n",
"\n",
"> `--tuning_strategy`: 设置自动搜索超参策略,可选hazero和pshe2,默认为hazero\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册