提交 9f64ef8e 编写于 作者: S shenyuhan

fix ci

......@@ -37,8 +37,9 @@ PaddleHub是基于PaddlePaddle生态下的预训练模型管理和迁移学习
* Python==2.7 or Python>=3.5
* PaddlePaddle>=1.4.0
除上述依赖外,PaddleHub的预训练模型和预置数据集需要连接服务端进行下载,请确保机器可以正常访问网络
除上述依赖外,PaddleHub的预训练模型和预置数据集需要连接服务端进行下载,请确保机器可以正常访问网络。若本地已存在相关的数据集和预训练模型,则可以离线运行PaddleHub。
**NOTE:** 若是出现离线运行PaddleHub错误,请更新PaddleHub 1.1.1版本之上。
pip安装方式如下:
```shell
......@@ -102,9 +103,11 @@ PaddleHub如何完成迁移学习,详情参考[wiki教程](https://github.com/
PaddleHub如何自定义迁移任务,详情参考[wiki教程](https://github.com/PaddlePaddle/PaddleHub/wiki/PaddleHub:-%E8%87%AA%E5%AE%9A%E4%B9%89Task)
如何使用PaddleHub超参优化功能[autofinetune使用教程](https://github.com/PaddlePaddle/PaddleHub/blob/develop/tutorial/autofinetune.md)
如何使用PaddleHub超参优化功能,详情参考[autofinetune使用教程](https://github.com/PaddlePaddle/PaddleHub/blob/develop/tutorial/autofinetune.md)
如何使用PaddleHub“端到端地”完成文本相似度计算[word2vce使用教程](https://github.com/PaddlePaddle/PaddleHub/blob/develop/tutorial/sentence_sim.ipynb)
如何使用PaddleHub“端到端地”完成文本相似度计算,详情参考[word2vce使用教程](https://github.com/PaddlePaddle/PaddleHub/blob/develop/tutorial/sentence_sim.ipynb)
如何使用ULMFiT策略微调PaddleHub预训练模型,详情参考[PaddleHub 迁移学习与ULMFiT微调策略](https://github.com/PaddlePaddle/PaddleHub/blob/develop/tutorial/strategy_exp.md)
## FAQ
......
......@@ -14,6 +14,7 @@
* 新增**阅读理解Fine-tune任务****回归Fine-tune任务**
* 新增多指标评测
* 优化predict接口
* 可视化工具支持使用tb_paddle
# PaddleHub v1.1.2
......@@ -69,7 +70,5 @@
正式发布PaddleHub预训练模型管理工具,旨在帮助用户更高效的管理模型并开展迁移学习的工作。
**预训练模型管理**: 通过hub命令行可完成PaddlePaddle生态的预训练模型下载、搜索、版本管理等功能。
**命令行一键使用**: 无需代码,通过命令行即可直接使用预训练模型进行预测,快速调研训练模型效果。目前版本支持以下模型:词法分析LAC;情感分析Senta;目标检测SSD;图像分类ResNet, MobileNet, NASNet等。
**迁移学习**: 提供了基于预训练模型的Finetune API,用户通过少量代码即可完成迁移学习,包括BERT/ERNIE文本分类、序列标注、图像分类迁移等。
......@@ -164,15 +164,15 @@ class BaseTuningStrategy(object):
params_cudas_dirs = []
solution_results = []
cnt = 0
solutions_ckptdirs = {}
solutions_modeldirs = {}
mkdir(output_dir)
for idx, solution in enumerate(solutions):
cuda = self.is_cuda_free["free"][0]
ckptdir = output_dir + "/ckpt-" + str(idx)
modeldir = output_dir + "/model-" + str(idx) + "/"
log_file = output_dir + "/log-" + str(idx) + ".info"
params_cudas_dirs.append([solution, cuda, ckptdir, log_file])
solutions_ckptdirs[tuple(solution)] = ckptdir
params_cudas_dirs.append([solution, cuda, modeldir, log_file])
solutions_modeldirs[tuple(solution)] = modeldir
self.is_cuda_free["free"].remove(cuda)
self.is_cuda_free["busy"].append(cuda)
if len(params_cudas_dirs
......@@ -190,7 +190,7 @@ class BaseTuningStrategy(object):
self.feedback(solutions, solution_results)
return solutions_ckptdirs
return solutions_modeldirs
class HAZero(BaseTuningStrategy):
......
......@@ -36,11 +36,12 @@ else:
class BaseEvaluator(object):
def __init__(self, params_file, finetunee_script):
def __init__(self, params_file, finetunee_script, options_str=""):
with io.open(params_file, 'r', encoding='utf8') as f:
self.params = yaml.safe_load(f)
self.finetunee_script = finetunee_script
self.model_rewards = {}
self.options_str = options_str
def get_init_params(self):
init_params = []
......@@ -108,13 +109,14 @@ class BaseEvaluator(object):
class FullTrailEvaluator(BaseEvaluator):
def __init__(self, params_file, finetunee_script):
super(FullTrailEvaluator, self).__init__(params_file, finetunee_script)
def __init__(self, params_file, finetunee_script, options_str=""):
super(FullTrailEvaluator, self).__init__(
params_file, finetunee_script, options_str=options_str)
def run(self, *args):
params = args[0][0]
num_cuda = args[0][1]
ckpt_dir = args[0][2]
saved_params_dir = args[0][2]
log_file = args[0][3]
params = self.convert_params(params)
if not self.is_valid_params(params):
......@@ -125,12 +127,11 @@ class FullTrailEvaluator(BaseEvaluator):
f.close()
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file)
try:
os.system(run_cmd)
with open(log_file, "r") as f:
......@@ -142,20 +143,21 @@ class FullTrailEvaluator(BaseEvaluator):
% param_str.replace("--", ""))
eval_result = 0.0
reward = self.get_reward(eval_result)
self.model_rewards[ckpt_dir] = reward
self.model_rewards[saved_params_dir] = reward
return reward
class ModelBasedEvaluator(BaseEvaluator):
def __init__(self, params_file, finetunee_script):
super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script)
self.half_best_model_ckpt = []
def __init__(self, params_file, finetunee_script, options_str=""):
super(ModelBasedEvaluator, self).__init__(
params_file, finetunee_script, options_str=options_str)
self.half_best_model_path = []
self.run_count = 0
def run(self, *args):
params = args[0][0]
num_cuda = args[0][1]
ckpt_dir = args[0][2]
saved_params_dir = args[0][2]
log_file = args[0][3]
params = self.convert_params(params)
if not self.is_valid_params(params):
......@@ -165,22 +167,23 @@ class ModelBasedEvaluator(BaseEvaluator):
f = open(log_file, "w")
f.close()
if len(self.half_best_model_ckpt) > 0:
model_path = self.half_best_model_ckpt[self.run_count % len(
self.half_best_model_ckpt)] + "/best_model"
if len(self.half_best_model_path) > 0:
model_path = self.half_best_model_path[self.run_count % len(
self.half_best_model_path)]
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, saved_params_dir, param_str, self.options_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, saved_params_dir, param_str, self.options_str, log_file)
else:
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --saved_params_dir=%s %s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, saved_params_dir, param_str, self.options_str, log_file)
self.run_count += 1
try:
......@@ -194,7 +197,7 @@ class ModelBasedEvaluator(BaseEvaluator):
% param_str.replace("--", ""))
eval_result = 0.0
reward = self.get_reward(eval_result)
self.model_rewards[ckpt_dir] = reward
self.model_rewards[saved_params_dir] = reward
return reward
def new_round(self):
......@@ -202,7 +205,7 @@ class ModelBasedEvaluator(BaseEvaluator):
half_size = int(len(self.model_rewards) / 2)
if half_size < 1:
half_size = 1
self.half_best_model_ckpt = list({
self.half_best_model_path = list({
key
for key in sorted(
self.model_rewards, key=self.model_rewards.get, reverse=False)
......
......@@ -91,6 +91,21 @@ class AutoFineTuneCommand(BaseCommand):
type=str,
default="HAZero",
help="Choices: HAZero or PSHE2.")
self.arg_config_group.add_argument(
'opts',
help='See utils/config.py for all options',
default=None,
nargs=argparse.REMAINDER)
def convert_to_other_options(self, config_list):
if len(config_list) % 2 != 0:
raise ValueError(
"Command for finetuned task options config format error! Please check it: {}"
.format(config_list))
options_str = ""
for key, value in zip(config_list[0::2], config_list[1::2]):
options_str += "--" + key + "=" + value + " "
return options_str
def execute(self, argv):
if not argv:
......@@ -109,6 +124,11 @@ class AutoFineTuneCommand(BaseCommand):
description=
"Autofintune configuration for controlling autofinetune behavior, not required"
)
self.arg_finetuned_task_group = self.parser.add_argument_group(
title="Finetuned task config options",
description=
"Finetuned task configuration for controlling finetuned task behavior, not required"
)
self.add_params_file_arg()
self.add_autoft_config_arg()
......@@ -118,12 +138,20 @@ class AutoFineTuneCommand(BaseCommand):
return False
self.args = self.parser.parse_args(argv[1:])
options_str = ""
if self.args.opts is not None:
options_str = self.convert_to_other_options(self.args.opts)
if self.args.evaluate_choice.lower() == "fulltrail":
evaluator = FullTrailEvaluator(self.args.param_file,
self.fintunee_script)
evaluator = FullTrailEvaluator(
self.args.param_file,
self.fintunee_script,
options_str=options_str)
elif self.args.evaluate_choice.lower() == "modelbased":
evaluator = ModelBasedEvaluator(self.args.param_file,
self.fintunee_script)
evaluator = ModelBasedEvaluator(
self.args.param_file,
self.fintunee_script,
options_str=options_str)
else:
raise ValueError(
"The evaluate %s is not defined!" % self.args.evaluate_choice)
......@@ -145,13 +173,13 @@ class AutoFineTuneCommand(BaseCommand):
self.args.tuning_strategy)
run_round_cnt = 0
solutions_ckptdirs = {}
solutions_modeldirs = {}
print("PaddleHub Autofinetune starts.")
while (not autoft.is_stop()) and run_round_cnt < self.args.round:
print("PaddleHub Autofinetune starts round at %s." % run_round_cnt)
output_dir = autoft._output_dir + "/round" + str(run_round_cnt)
res = autoft.step(output_dir)
solutions_ckptdirs.update(res)
solutions_modeldirs.update(res)
evaluator.new_round()
run_round_cnt = run_round_cnt + 1
print("PaddleHub Autofinetune ends.")
......@@ -164,17 +192,15 @@ class AutoFineTuneCommand(BaseCommand):
print("%s=%s" % (hparam_name, best_hparams[index]))
f.write(hparam_name + "\t:\t" + str(best_hparams[index]) + "\n")
f.write("\n\n\n")
f.write("\t".join(autoft.hparams_name_list) + "\toutput_dir\n\n")
logger.info(
"The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ."
)
f.write("\t".join(autoft.hparams_name_list) +
"\tsaved_params_dir\n\n")
print(
"The checkpont directory of programs ran with hyperparamemters searched are saved as log_file.txt ."
)
for solution, ckptdir in solutions_ckptdirs.items():
for solution, modeldir in solutions_modeldirs.items():
param = evaluator.convert_params(solution)
param = [str(p) for p in param]
f.write("\t".join(param) + "\t" + ckptdir + "\n\n")
f.write("\t".join(param) + "\t" + modeldir + "\n\n")
return True
......
......@@ -588,13 +588,13 @@ class BasicTask(object):
run_states = self._run(do_eval=do_eval)
self.env.current_epoch += 1
# Save checkpoint after finetune
self.save_checkpoint()
# Final evaluation
if self._base_data_reader.get_dev_examples() != []:
self.eval(phase="dev")
if self._base_data_reader.get_test_examples() != []:
self.eval(phase="test", load_best_model=True)
# Save checkpoint after finetune
self.save_checkpoint()
self._finetune_end_event(run_states)
return run_states
......
#for python2, you should use requirements_py2.txt
pre-commit
protobuf >= 3.1.0
yapf == 0.26.0
pyyaml
numpy
#[py2]numpy == 1.16.0
Pillow
six >= 1.10.0
chardet == 3.0.4
requests
pandas
#[py2]pandas == 0.24.0
flake8
tb-paddle
tb-nightly
......
pre-commit
protobuf >= 3.1.0
yapf == 0.26.0
pyyaml
numpy < 1.17.0
Pillow
six >= 1.10.0
chardet == 3.0.4
requests
pandas < 0.25.0
flake8
tb-paddle
tb-nightly
cma == 2.7.0
......@@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [
]
if max_version < 3:
REQUIRED_PACKAGES += ["numpy == 1.16.0", "pandas == 0.24.0"]
REQUIRED_PACKAGES += ["numpy < 1.17.0", "pandas < 0.25.0"]
else:
REQUIRED_PACKAGES += ["numpy", "pandas"]
......
......@@ -11,6 +11,10 @@ PaddleHub Auto Fine-tune提供两种超参优化策略:
* PSHE2: 采用粒子群算法,最优超参数组合就是所求问题的解。现在想求得最优解就是要找到更新超参数组合,即如何更新超参数,才能让算法更快更好的收敛到最优解。PSE2算法根据超参数本身历史的最优,在一定随机扰动的情况下决定下一步的更新方向。
PaddleHub Auto Fine-tune提供两种超参评估策略:
* FullTrail: 给定一组超参,利用这组超参从头开始Finetune一个新模型,之后在数据集dev部分评估这个模型
......@@ -75,11 +79,21 @@ parser.add_argument("--warmup_prop", type=float, default=0.1, help="warmup_prop.
parser.add_argument("--weight_decay", type=float, default=0.01, help="weight_decay.")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--saved_params_dir", type=str, default="", help="Directory for saving model during ")
parser.add_argument("--model_path", type=str, default="", help="load model path")
args = parser.parse_args()
# yapf: enable.
def is_path_valid(path):
if path == "":
return False
path = os.path.abspath(path)
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.mkdir(dirname)
return True
if __name__ == '__main__':
# Load Paddlehub ERNIE pretrained model
module = hub.Module(name="ernie")
......@@ -133,7 +147,7 @@ if __name__ == '__main__':
config=config,
metrics_choices=metrics_choices)
# Finetune and evaluate by PaddleHub's API
# Load model from the defined model path or not
if args.model_path != "":
with cls_task.phase_guard(phase="train"):
cls_task.init_if_necessary()
......@@ -144,12 +158,17 @@ if __name__ == '__main__':
run_states = cls_task.eval()
eval_avg_score, eval_avg_loss, eval_run_speed = cls_task._calculate_metrics(run_states)
print(eval_avg_score["acc"], end="")
# Move ckpt/best_model to the defined saved parameters directory
if is_path_valid(args.saved_params_dir) and os.path.exists(config.checkpoint_dir+"/best_model/"):
shutil.copytree(config.checkpoint_dir+"/best_model/", args.saved_params_dir)
shutil.rmtree(config.checkpoint_dir)
print(eval_avg_score["acc"], end="")
```
**Note**:以上是finetunee.py的写法。
> finetunee.py必须可以接收待优化超参数选项参数, 并且待搜素超参数选项名字和yaml文件中的超参数名字保持一致.
> finetunee.py必须有checkpoint_dir这个选项。
> finetunee.py必须有saved_params_dir这个选项。
> PaddleHub Auto Fine-tune超参评估策略选择为ModelBased,finetunee.py必须有model_path选项。
......@@ -167,7 +186,7 @@ print(eval_avg_score["acc"], end="")
```shell
$ OUTPUT=result/
$ hub autofinetune finetunee.py --param_file=hparam.yaml --cuda=['1','2'] --popsize=5 --round=10
$ --output_dir=${OUTPUT} --evaluate_choice=fulltrail --tuning_strategy=pshe2
--output_dir=${OUTPUT} --evaluate_choice=fulltrail --tuning_strategy=pshe2
```
其中,选项
......@@ -202,8 +221,16 @@ $ tensorboard --logdir $OUTPUT/tb_paddle --host ${HOST_IP} --port ${PORT_NUM}
## 五、其他
如在使用Auto Fine-tune功能时,输出信息中包含如下字样:
1. 如在使用Auto Fine-tune功能时,输出信息中包含如下字样:
**WARNING:Program which was ran with hyperparameters as ... was crashed!**
首先根据终端上的输出信息,确定这个输出信息是在第几个round(如round 3),之后查看${OUTPUT}/round3/下的日志文件信息log.info, 查看具体出错原因。
2. PaddleHub AutoFinetune 命令行支持从启动命令hub autofinetune传入finetunee.py中不需要搜索的选项参数,如上述示例中的max_seq_len选项,可以参照以下方式传入。
```shell
$ OUTPUT=result/
$ hub autofinetune finetunee.py --param_file=hparam.yaml --cuda=['1','2'] --popsize=5 --round=10
--output_dir=${OUTPUT} --evaluate_choice=fulltrail --tuning_strategy=pshe2 max_seq_len 128
```
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册