提交 7c600b66 编写于 作者: K kinghuin 提交者: wuzewu

Merdev (#143)

* Add Reading Comprehension Taskwq

* Add Reading Comprehension Task

* Add Reading Comprehension Task

* dd Reading Comprehension Task

* Add the reading comprehension task

* add reading comprehension task

* Add reading comprehension

* Add reading comprehension task

* add reading comprehension task

* Add reading comprehension task

* Add reading comprehension task

* Fix tokenization for bert chn

* Add GLUE and XNLI, modify text-classification demo test=develop (#94)

* Add GLUE and XNLI, modify text-classification demo

* add hub_server rw lock

* Support GLUE (#108)

* Support GLUE

* Support MNLI_m and MNLI_mm

* restore checkpoint.py

* Modify for review

* Fix the bug whether module is valid or not

* Enhancetask (#122)

* accelerate predict

* Add autoft (#127)

* add autofinetune

* update paddlehub required libs

* Add copyright

* Split task and add regression demo  (#130)

* split task

* add regression demo

* implement CombinedStrategy (#128)

* implement CombinedStrategy

* optimize some details (#131)

* optimize some details

* optimize some details (#133)

* support cv_reader

* use logger instead of print

* add warning

* remove todo

* add gradual_unfreeze frz_blocks

* replace the visualization toolkit from visualdl to tb-paddle (#139)

* replace the logging toolkit from visualdl to tb-paddle.

* modified:   requirements.txt

* modified:   paddlehub/finetune/task/basic_task.py

* update autofinetune  (#135)

* update autofinetune (add modelbased evaluator)

* support cpu count

* fix ci
上级 e5cb716b
...@@ -10,13 +10,13 @@ import numpy as np ...@@ -10,13 +10,13 @@ import numpy as np
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.")
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.") parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.") parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.") parser.add_argument("--use_pyreader", type=ast.literal_eval, default=True, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
# yapf: enable. # yapf: enable.
module_map = { module_map = {
......
""" Official evaluation script for v1.1 of the SQuAD dataset. """
from __future__ import print_function
from collections import Counter
import string
import re
import argparse
import json
import sys
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
return re.sub(r'\b(a|an|the)\b', ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
def f1_score(prediction, ground_truth):
prediction_tokens = normalize_answer(prediction).split()
ground_truth_tokens = normalize_answer(ground_truth).split()
common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
num_same = sum(common.values())
if num_same == 0:
return 0
precision = 1.0 * num_same / len(prediction_tokens)
recall = 1.0 * num_same / len(ground_truth_tokens)
f1 = (2 * precision * recall) / (precision + recall)
return f1
def exact_match_score(prediction, ground_truth):
return (normalize_answer(prediction) == normalize_answer(ground_truth))
def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
scores_for_ground_truths = []
for ground_truth in ground_truths:
score = metric_fn(prediction, ground_truth)
scores_for_ground_truths.append(score)
return max(scores_for_ground_truths)
def evaluate(dataset, predictions):
f1 = exact_match = total = 0
for article in dataset:
for paragraph in article['paragraphs']:
for qa in paragraph['qas']:
total += 1
if qa['id'] not in predictions:
message = 'Unanswered question ' + qa['id'] + \
' will receive score 0.'
print(message, file=sys.stderr)
continue
ground_truths = list(map(lambda x: x['text'], qa['answers']))
prediction = predictions[qa['id']]
exact_match += metric_max_over_ground_truths(
exact_match_score, prediction, ground_truths)
f1 += metric_max_over_ground_truths(f1_score, prediction,
ground_truths)
exact_match = 100.0 * exact_match / total
f1 = 100.0 * f1 / total
return {'exact_match': exact_match, 'f1': f1}
if __name__ == '__main__':
expected_version = '1.1'
parser = argparse.ArgumentParser(
description='Evaluation for SQuAD ' + expected_version)
parser.add_argument('dataset_file', help='Dataset file')
parser.add_argument('prediction_file', help='Prediction File')
args = parser.parse_args()
with open(args.dataset_file) as dataset_file:
dataset_json = json.load(dataset_file)
if (dataset_json['version'] != expected_version):
print(
'Evaluation expects v-' + expected_version +
', but got dataset with v-' + dataset_json['version'],
file=sys.stderr)
dataset = dataset_json['data']
print(args.prediction_file)
with open(args.prediction_file) as prediction_file:
predictions = json.load(prediction_file)
print(json.dumps(evaluate(dataset, predictions)))
"""Official evaluation script for SQuAD version 2.0.
In addition to basic functionality, we also compute additional statistics and
plot precision-recall curves if an additional na_prob.json file is provided.
This file is expected to map question ID's to the model's predicted probability
that a question is unanswerable.
"""
import argparse
import collections
import json
import numpy as np
import os
import re
import string
import sys
def make_qid_to_has_ans(dataset):
qid_to_has_ans = {}
for article in dataset:
for p in article['paragraphs']:
for qa in p['qas']:
qid_to_has_ans[qa['id']] = bool(qa['answers'])
return qid_to_has_ans
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
return re.sub(regex, ' ', text)
def white_space_fix(text):
return ' '.join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
def get_tokens(s):
if not s: return []
return normalize_answer(s).split()
def compute_exact(a_gold, a_pred):
return int(normalize_answer(a_gold) == normalize_answer(a_pred))
def compute_f1(a_gold, a_pred):
gold_toks = get_tokens(a_gold)
pred_toks = get_tokens(a_pred)
common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
num_same = sum(common.values())
if len(gold_toks) == 0 or len(pred_toks) == 0:
# If either is no-answer, then F1 is 1 if they agree, 0 otherwise
return int(gold_toks == pred_toks)
if num_same == 0:
return 0
precision = 1.0 * num_same / len(pred_toks)
recall = 1.0 * num_same / len(gold_toks)
f1 = (2 * precision * recall) / (precision + recall)
return f1
def get_raw_scores(dataset, preds):
exact_scores = {}
f1_scores = {}
for article in dataset:
for p in article['paragraphs']:
for qa in p['qas']:
qid = qa['id']
gold_answers = [
a['text'] for a in qa['answers']
if normalize_answer(a['text'])
]
if not gold_answers:
# For unanswerable questions, only correct answer is empty string
gold_answers = ['']
if qid not in preds:
print('Missing prediction for %s' % qid)
continue
a_pred = preds[qid]
# Take max over all gold answers
exact_scores[qid] = max(
compute_exact(a, a_pred) for a in gold_answers)
f1_scores[qid] = max(
compute_f1(a, a_pred) for a in gold_answers)
return exact_scores, f1_scores
def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thresh):
new_scores = {}
for qid, s in scores.items():
pred_na = na_probs[qid] > na_prob_thresh
if pred_na:
new_scores[qid] = float(not qid_to_has_ans[qid])
else:
new_scores[qid] = s
return new_scores
def make_eval_dict(exact_scores, f1_scores, qid_list=None):
if not qid_list:
total = len(exact_scores)
return collections.OrderedDict([
('exact', 100.0 * sum(exact_scores.values()) / total),
('f1', 100.0 * sum(f1_scores.values()) / total),
('total', total),
])
else:
total = len(qid_list)
return collections.OrderedDict([
('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total),
('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total),
('total', total),
])
def merge_eval(main_eval, new_eval, prefix):
for k in new_eval:
main_eval['%s_%s' % (prefix, k)] = new_eval[k]
def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs,
qid_to_has_ans):
best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs,
qid_to_has_ans)
best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs,
qid_to_has_ans)
main_eval['best_exact'] = best_exact
main_eval['best_exact_thresh'] = exact_thresh
main_eval['best_f1'] = best_f1
main_eval['best_f1_thresh'] = f1_thresh
def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k])
cur_score = num_no_ans
best_score = cur_score
best_thresh = 0.0
qid_list = sorted(na_probs, key=lambda k: na_probs[k])
for i, qid in enumerate(qid_list):
if qid not in scores:
continue
if qid_to_has_ans[qid]:
diff = scores[qid]
else:
if preds[qid]:
diff = -1
else:
diff = 0
cur_score += diff
if cur_score > best_score:
best_score = cur_score
best_thresh = na_probs[qid]
return 100.0 * best_score / len(scores), best_thresh
此差异已折叠。
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finetuning on classification task """
import argparse
import ast
import paddle.fluid as fluid
import paddlehub as hub
hub.common.logger.logger.setLevel("INFO")
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=True, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
parser.add_argument("--version_2_with_negative", type=ast.literal_eval, default=False, help="If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub bert_uncased_L-12_H-768_A-12 pretrained model
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Download dataset and use ReadingComprehensionReader to read dataset
dataset = hub.dataset.SQUAD(
version_2_with_negative=args.version_2_with_negative)
reader = hub.reader.ReadingComprehensionReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_length=args.max_seq_len,
doc_stride=128,
max_query_length=64)
seq_output = outputs["sequence_output"]
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Select finetune strategy, setup config and finetune
strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay,
learning_rate=args.learning_rate,
warmup_proportion=args.warmup_proportion,
lr_scheduler="linear_decay")
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
log_interval=10,
use_pyreader=args.use_pyreader,
use_data_parallel=args.use_data_parallel,
save_ckpt_interval=1000,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
enable_memory_optim=True,
strategy=strategy)
# Define a reading comprehension finetune task by PaddleHub's API
reading_comprehension_task = hub.ReadingComprehensionTask(
data_reader=reader,
feature=seq_output,
feed_list=feed_list,
config=config)
# Finetune by PaddleHub's API
reading_comprehension_task.finetune()
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0,1
python -u reading_comprehension.py \
--batch_size=12 \
--use_gpu=True \
--checkpoint_dir="./ckpt_rc" \
--learning_rate=3e-5 \
--weight_decay=0.01 \
--warmup_proportion=0.1 \
--num_epoch=2 \
--max_seq_len=384 \
--use_pyreader=True \
--use_data_parallel=True \
--version_2_with_negative=False
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_rc"
RES_DIR="./result"
mkdir $RES_DIR
python -u predict.py \
--batch_size=12 \
--use_gpu=True \
--checkpoint_dir=${CKPT_DIR} \
--learning_rate=3e-5 \
--weight_decay=0.01 \
--warmup_proportion=0.1 \
--num_epoch=1 \
--max_seq_len=384 \
--use_pyreader=False \
--use_data_parallel=False \
--version_2_with_negative=False \
--result_dir=${RES_DIR}
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finetuning on classification task """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import ast
import numpy as np
import os
import time
import paddle
import paddle.fluid as fluid
import paddlehub as hub
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--dataset", type=str, default="STS-B", help="Directory to model checkpoint")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
dataset = None
metrics_choices = []
# Download dataset and use ClassifyReader to read dataset
if args.dataset.lower() == "sts-b":
dataset = hub.dataset.GLUE("STS-B")
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
else:
raise ValueError("%s dataset is not defined" % args.dataset)
support_metrics = ["acc", "f1", "matthews"]
for metric in metrics_choices:
if metric not in support_metrics:
raise ValueError("\"%s\" metric is not defined" % metric)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
reader = hub.reader.RegressionReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=False,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
batch_size=args.batch_size,
enable_memory_optim=False,
checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a regression finetune task by PaddleHub's API
reg_task = hub.RegressionTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
config=config)
# Data to be prdicted
data = [[d.text_a, d.text_b] for d in dataset.get_predict_examples()]
index = 0
run_states = reg_task.predict(data=data)
results = [run_state.run_results for run_state in run_states]
if not os.path.exists("output"):
os.makedirs("output")
fout = open(os.path.join("output", "%s.tsv" % args.dataset.upper()), 'w')
fout.write("index\tprediction")
for batch_result in results:
for result in batch_result[0]:
if index < 3:
print("%s\t%s\tpredict=%.3f" % (data[index][0], data[index][1],
result[0]))
fout.write("\n%s\t%.3f" % (index, result[0]))
index += 1
fout.close()
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finetuning on classification task """
import argparse
import ast
import paddle.fluid as fluid
import paddlehub as hub
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--dataset", type=str, default="STS-B", help="Directory to model checkpoint")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--data_dir", type=str, default=None, help="Path to training data.")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
dataset = None
# Download dataset and use ClassifyReader to read dataset
if args.dataset.lower() == "sts-b":
dataset = hub.dataset.GLUE("STS-B")
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
else:
raise ValueError("%s dataset is not defined" % args.dataset)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
reader = hub.reader.RegressionReader(
dataset=dataset,
vocab_path=module.get_vocab_path(),
max_seq_len=args.max_seq_len)
# Construct transfer learning network
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Select finetune strategy, setup config and finetune
strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay,
learning_rate=args.learning_rate,
lr_scheduler="linear_decay")
# Setup runing config for PaddleHub Finetune API
config = hub.RunConfig(
use_data_parallel=args.use_data_parallel,
use_pyreader=args.use_pyreader,
use_cuda=args.use_gpu,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
checkpoint_dir=args.checkpoint_dir,
strategy=strategy)
# Define a regression finetune task by PaddleHub's API
reg_task = hub.RegressionTask(
data_reader=reader,
feature=pooled_output,
feed_list=feed_list,
config=config)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
reg_task.finetune_and_eval()
export FLAGS_eager_delete_tensor_gb=0.0
# export CUDA_VISIBLE_DEVICES=0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
DATASET="STS-B"
CKPT_DIR="./ckpt_${DATASET}"
# STS-B: batch_size=32, max_seq_len=128
python -u predict.py --checkpoint_dir $CKPT_DIR \
--max_seq_len 128 \
--use_gpu True \
--dataset=${DATASET} \
--batch_size=32 \
export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
DATASET="STS-B"
CKPT_DIR="./ckpt_${DATASET}"
# Recommending hyper parameters for difference task
# STS-B: batch_size=32, weight_decay=0.1, num_epoch=3, max_seq_len=128, lr=4e-5
python -u regression.py \
--batch_size=32 \
--use_gpu=True \
--dataset=${DATASET} \
--checkpoint_dir=${CKPT_DIR} \
--learning_rate=4e-5 \
--weight_decay=0.1 \
--max_seq_len=128 \
--num_epoch=3 \
--use_pyreader=True \
--use_data_parallel=True \
...@@ -29,7 +29,7 @@ import paddlehub as hub ...@@ -29,7 +29,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default="ckpt_20190802182531", help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
...@@ -42,64 +42,89 @@ args = parser.parse_args() ...@@ -42,64 +42,89 @@ args = parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
dataset = None dataset = None
metrics_choices = []
# Download dataset and use ClassifyReader to read dataset # Download dataset and use ClassifyReader to read dataset
if args.dataset.lower() == "chnsenticorp": if args.dataset.lower() == "chnsenticorp":
dataset = hub.dataset.ChnSentiCorp() dataset = hub.dataset.ChnSentiCorp()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "nlpcc_dbqa": elif args.dataset.lower() == "nlpcc_dbqa":
dataset = hub.dataset.NLPCC_DBQA() dataset = hub.dataset.NLPCC_DBQA()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "lcqmc": elif args.dataset.lower() == "lcqmc":
dataset = hub.dataset.LCQMC() dataset = hub.dataset.LCQMC()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mrpc": elif args.dataset.lower() == "mrpc":
dataset = hub.dataset.GLUE("MRPC") dataset = hub.dataset.GLUE("MRPC")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["f1", "acc"]
# The first metric will be choose to eval. Ref: task.py:799
elif args.dataset.lower() == "qqp": elif args.dataset.lower() == "qqp":
dataset = hub.dataset.GLUE("QQP") dataset = hub.dataset.GLUE("QQP")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["f1", "acc"]
elif args.dataset.lower() == "sst-2": elif args.dataset.lower() == "sst-2":
dataset = hub.dataset.GLUE("SST-2") dataset = hub.dataset.GLUE("SST-2")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "cola": elif args.dataset.lower() == "cola":
dataset = hub.dataset.GLUE("CoLA") dataset = hub.dataset.GLUE("CoLA")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["matthews", "acc"]
elif args.dataset.lower() == "qnli": elif args.dataset.lower() == "qnli":
dataset = hub.dataset.GLUE("QNLI") dataset = hub.dataset.GLUE("QNLI")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "rte": elif args.dataset.lower() == "rte":
dataset = hub.dataset.GLUE("RTE") dataset = hub.dataset.GLUE("RTE")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
elif args.dataset.lower() == "mnli": metrics_choices = ["acc"]
dataset = hub.dataset.GLUE("MNLI") elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli_m":
dataset = hub.dataset.GLUE("MNLI_m")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mnli_mm":
dataset = hub.dataset.GLUE("MNLI_mm")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower().startswith("xnli"): elif args.dataset.lower().startswith("xnli"):
dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:]) dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12") module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
metrics_choices = ["acc"]
else: else:
raise ValueError("%s dataset is not defined" % args.dataset) raise ValueError("%s dataset is not defined" % args.dataset)
support_metrics = ["acc", "f1", "matthews"]
for metric in metrics_choices:
if metric not in support_metrics:
raise ValueError("\"%s\" metric is not defined" % metric)
inputs, outputs, program = module.context( inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
...@@ -147,7 +172,8 @@ if __name__ == '__main__': ...@@ -147,7 +172,8 @@ if __name__ == '__main__':
feature=pooled_output, feature=pooled_output,
feed_list=feed_list, feed_list=feed_list,
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config,
metrics_choices=metrics_choices)
# Data to be prdicted # Data to be prdicted
data = [[d.text_a, d.text_b] for d in dataset.get_dev_examples()[:3]] data = [[d.text_a, d.text_b] for d in dataset.get_dev_examples()[:3]]
......
...@@ -2,18 +2,32 @@ export FLAGS_eager_delete_tensor_gb=0.0 ...@@ -2,18 +2,32 @@ export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
DATASET="chnsenticorp" DATASET="chnsenticorp"
CKPT_DIR="./ckpt_${DATASET}" CKPT_DIR="./ckpt_${DATASET}"
# Support ChnSentiCorp NLPCC_DBQA LCQMC MRPC QQP SST-2 # Recommending hyper parameters for difference task
# CoLA QNLI RTE MNLI XNLI # ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# for XNLI: Specify the language with an underscore like xnli_zh. # NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
# ar: Arabic bg: Bulgarian de: German # LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
# el: Greek en: English es: Spanish # QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# fr: French hi: Hindi ru: Russian # QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# sw: Swahili th: Thai tr: Turkish # SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# ur: Urdu vi: Vietnamese zh: Chinese (Simplified) # CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
# Specify the matched/mismatched dev and test dataset with an underscore.
# mnli_m or mnli: dev and test in matched dataset.
# mnli_mm: dev and test in mismatched dataset.
# The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
# If you are not sure which one to pick, just use mnli or mnli_m.
# XNLI: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
# Specify the language with an underscore like xnli_zh.
# ar- Arabic bg- Bulgarian de- German
# el- Greek en- English es- Spanish
# fr- French hi- Hindi ru- Russian
# sw- Swahili th- Thai tr- Turkish
# ur- Urdu vi- Vietnamese zh- Chinese (Simplified)
python -u text_classifier.py \ python -u text_classifier.py \
--batch_size=24 \ --batch_size=24 \
......
...@@ -2,17 +2,20 @@ export FLAGS_eager_delete_tensor_gb=0.0 ...@@ -2,17 +2,20 @@ export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
DATASET="chnsenticorp"
CKPT_DIR="./ckpt_${DATASET}"
# Support ChnSentiCorp NLPCC_DBQA LCQMC MRPC QQP SST-2 # Support ChnSentiCorp NLPCC_DBQA LCQMC MRPC QQP SST-2
# CoLA QNLI RTE MNLI XNLI # CoLA QNLI RTE MNLI (or MNLI_m) MNLI_mm) XNLI
# for XNLI: Specify the language with an underscore like xnli_zh. # for XNLI: Specify the language with an underscore like xnli_zh.
# ar: Arabic bg: Bulgarian de: German # ar: Arabic bg: Bulgarian de: German
# el: Greek en: English es: Spanish # el: Greek en: English es: Spanish
# fr: French hi: Hindi ru: Russian # fr: French hi: Hindi ru: Russian
# sw: Swahili th: Thai tr: Turkish # sw: Swahili th: Thai tr: Turkish
# ur: Urdu vi: Vietnamese zh: Chinese (Simplified) # ur: Urdu vi: Vietnamese zh: Chinese (Simplified)
DATASET="ChnSentiCorp"
CKPT_DIR="./ckpt_${DATASET}"
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu False --dataset=${DATASET} --use_taskid False python -u predict.py --checkpoint_dir=$CKPT_DIR \
--max_seq_len=128 \
--use_gpu=True \
--dataset=${DATASET} \
--batch_size=150 \
--use_taskid=False \
...@@ -26,7 +26,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whet ...@@ -26,7 +26,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whet
parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset") parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--data_dir", type=str, default=None, help="Path to training data.") parser.add_argument("--data_dir", type=str, default=None, help="Path to training data.")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
...@@ -39,64 +39,89 @@ args = parser.parse_args() ...@@ -39,64 +39,89 @@ args = parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
dataset = None dataset = None
metrics_choices = []
# Download dataset and use ClassifyReader to read dataset # Download dataset and use ClassifyReader to read dataset
if args.dataset.lower() == "chnsenticorp": if args.dataset.lower() == "chnsenticorp":
dataset = hub.dataset.ChnSentiCorp() dataset = hub.dataset.ChnSentiCorp()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "nlpcc_dbqa": elif args.dataset.lower() == "nlpcc_dbqa":
dataset = hub.dataset.NLPCC_DBQA() dataset = hub.dataset.NLPCC_DBQA()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "lcqmc": elif args.dataset.lower() == "lcqmc":
dataset = hub.dataset.LCQMC() dataset = hub.dataset.LCQMC()
module = hub.Module(name="ernie") module = hub.Module(name="ernie")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mrpc": elif args.dataset.lower() == "mrpc":
dataset = hub.dataset.GLUE("MRPC") dataset = hub.dataset.GLUE("MRPC")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["f1", "acc"]
# The first metric will be choose to eval. Ref: task.py:799
elif args.dataset.lower() == "qqp": elif args.dataset.lower() == "qqp":
dataset = hub.dataset.GLUE("QQP") dataset = hub.dataset.GLUE("QQP")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["f1", "acc"]
elif args.dataset.lower() == "sst-2": elif args.dataset.lower() == "sst-2":
dataset = hub.dataset.GLUE("SST-2") dataset = hub.dataset.GLUE("SST-2")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "cola": elif args.dataset.lower() == "cola":
dataset = hub.dataset.GLUE("CoLA") dataset = hub.dataset.GLUE("CoLA")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["matthews", "acc"]
elif args.dataset.lower() == "qnli": elif args.dataset.lower() == "qnli":
dataset = hub.dataset.GLUE("QNLI") dataset = hub.dataset.GLUE("QNLI")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "rte": elif args.dataset.lower() == "rte":
dataset = hub.dataset.GLUE("RTE") dataset = hub.dataset.GLUE("RTE")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
elif args.dataset.lower() == "mnli": metrics_choices = ["acc"]
dataset = hub.dataset.GLUE("MNLI") elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli":
dataset = hub.dataset.GLUE("MNLI_m")
if args.use_taskid: if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base") module = hub.Module(name="ernie_v2_eng_base")
else: else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12") module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mnli_mm":
dataset = hub.dataset.GLUE("MNLI_mm")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
metrics_choices = ["acc"]
elif args.dataset.lower().startswith("xnli"): elif args.dataset.lower().startswith("xnli"):
dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:]) dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12") module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
metrics_choices = ["acc"]
else: else:
raise ValueError("%s dataset is not defined" % args.dataset) raise ValueError("%s dataset is not defined" % args.dataset)
support_metrics = ["acc", "f1", "matthews"]
for metric in metrics_choices:
if metric not in support_metrics:
raise ValueError("\"%s\" metric is not defined" % metric)
inputs, outputs, program = module.context( inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len) trainable=True, max_seq_len=args.max_seq_len)
reader = hub.reader.ClassifyReader( reader = hub.reader.ClassifyReader(
...@@ -144,7 +169,8 @@ if __name__ == '__main__': ...@@ -144,7 +169,8 @@ if __name__ == '__main__':
feature=pooled_output, feature=pooled_output,
feed_list=feed_list, feed_list=feed_list,
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config,
metrics_choices=metrics_choices)
# Finetune and evaluate by PaddleHub's API # Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
......
...@@ -50,7 +50,12 @@ from .finetune.task import TextClassifierTask ...@@ -50,7 +50,12 @@ from .finetune.task import TextClassifierTask
from .finetune.task import ImageClassifierTask from .finetune.task import ImageClassifierTask
from .finetune.task import SequenceLabelTask from .finetune.task import SequenceLabelTask
from .finetune.task import MultiLabelClassifierTask from .finetune.task import MultiLabelClassifierTask
from .finetune.task import RegressionTask
from .finetune.task import ReadingComprehensionTask
from .finetune.config import RunConfig from .finetune.config import RunConfig
from .finetune.strategy import AdamWeightDecayStrategy from .finetune.strategy import AdamWeightDecayStrategy
from .finetune.strategy import DefaultStrategy from .finetune.strategy import DefaultStrategy
from .finetune.strategy import DefaultFinetuneStrategy from .finetune.strategy import DefaultFinetuneStrategy
from .finetune.strategy import L2SPFinetuneStrategy
from .finetune.strategy import ULMFiTStrategy
from .finetune.strategy import CombinedStrategy
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from multiprocessing.pool import ThreadPool
import copy
import json
import math
import numpy as np
import six
import time
from paddlehub.common.logger import logger
from paddlehub.common.utils import mkdir
if six.PY3:
INF = math.inf
else:
INF = float("inf")
class PSHE2(object):
def __init__(
self,
evaluator,
cudas=["0"],
popsize=5,
output_dir=None,
alpha=0.5,
epsilon=0.2,
):
self._num_thread = len(cudas)
self._popsize = popsize
self._alpha = alpha
self._epsilon = epsilon
self._iteration = 0
self.cudas = cudas
self.is_cuda_free = {"free": [], "busy": []}
self.is_cuda_free["free"] = cudas
self.evaluator = evaluator
self.init_input = evaluator.get_init_params()
self.num_hparm = len(self.init_input)
self.best_hparams_per_pop = [[0] * self.num_hparm] * self._popsize
self.best_reward_per_pop = [INF] * self._popsize
self.momentums = [[0] * self.num_hparm] * self._popsize
self.best_hparms_all_pop = []
self.best_reward_all_pop = INF
self.current_hparams = [[0] * self.num_hparm] * self._popsize
for i in range(self.popsize):
self.current_hparams[i] = self.randomSolution()
if output_dir is None:
now = int(time.time())
time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
self._output_dir = "output_" + time_str
else:
self._output_dir = output_dir
@property
def thread(self):
return self._num_thread
@property
def popsize(self):
return self._popsize
@property
def alpha(self):
return self._alpha
@property
def epsilon(self):
return self._epsilon
@property
def output_dir(self):
return self._output_dir
@property
def iteration(self):
return self._iteration
def set_output_dir(self, output_dir=None):
if output_dir is not None:
output_dir = output_dir
else:
output_dir = self._output_dir
return output_dir
def randomSolution(self):
solut = [0] * self.num_hparm
for i in range(self.num_hparm):
ratio = (np.random.random_sample() - 0.5) * 2.0
if ratio >= 0:
solut[i] = (
1.0 - self.init_input[i]) * ratio + self.init_input[i]
else:
solut[i] = (
self.init_input[i] + 1.0) * ratio + self.init_input[i]
return solut
def smallPeturb(self):
for i in range(self.popsize):
for j in range(self.num_hparm):
ratio = (np.random.random_sample() - 0.5) * 2.0
if ratio >= 0:
self.current_hparams[i][j] = (
1.0 - self.current_hparams[i][j]
) * ratio * self.epsilon + self.current_hparams[i][j]
else:
self.current_hparams[i][j] = (
self.current_hparams[i][j] +
1.0) * ratio * self.epsilon + self.current_hparams[i][j]
def estimatePopGradients(self):
gradients = [[0] * self.num_hparm] * self.popsize
for i in range(self.popsize):
for j in range(self.num_hparm):
gradients[i][j] = self.current_hparams[i][
j] - self.best_hparms_all_pop[j]
return gradients
def estimateLocalGradients(self):
gradients = [[0] * self.num_hparm] * self.popsize
for i in range(self.popsize):
for j in range(self.num_hparm):
gradients[i][j] = self.current_hparams[i][
j] - self.best_hparams_per_pop[i][j]
return gradients
def estimateMomemtum(self):
popGrads = self.estimatePopGradients()
localGrads = self.estimateLocalGradients()
for i in range(self.popsize):
for j in range(self.num_hparm):
self.momentums[i][j] = (
1 - 3.0 * self.alpha / self.iteration
) * self.momentums[i][j] - self.alpha * localGrads[i][
j] - self.alpha * popGrads[i][j]
def is_stop(self):
return False
def solutions(self):
return self.current_hparams
def feedback(self, params_list, reward_list):
self._iteration = self._iteration + 1
for i in range(self.popsize):
if reward_list[i] < self.best_reward_per_pop[i]:
self.best_hparams_per_pop[i] = copy.deepcopy(
self.current_hparams[i])
self.best_reward_per_pop[i] = reward_list[i]
if reward_list[i] < self.best_reward_all_pop:
self.best_hparms_all_pop = self.current_hparams[i]
self.best_reward_all_pop = reward_list[i]
self.estimateMomemtum()
for i in range(self.popsize):
for j in range(len(self.init_input)):
self.current_hparams[i][j] = self.current_hparams[i][
j] + self.alpha * self.momentums[i][j]
self.smallPeturb()
def optimal_solution(self):
return self.best_hparms_all_pop
def step(self, output_dir):
solutions = self.solutions()
params_cudas_dirs = []
solution_results = []
cnt = 0
solutions_ckptdirs = {}
mkdir(output_dir)
for idx, solution in enumerate(solutions):
cuda = self.is_cuda_free["free"][0]
ckptdir = output_dir + "/ckpt-" + str(idx)
log_file = output_dir + "/log-" + str(idx) + ".info"
params_cudas_dirs.append([solution, cuda, ckptdir, log_file])
solutions_ckptdirs[tuple(solution)] = ckptdir
self.is_cuda_free["free"].remove(cuda)
self.is_cuda_free["busy"].append(cuda)
if len(params_cudas_dirs) == self.thread or cnt == int(
self.popsize / self.thread):
tp = ThreadPool(len(params_cudas_dirs))
solution_results += tp.map(self.evaluator.run,
params_cudas_dirs)
cnt += 1
tp.close()
tp.join()
for param_cuda in params_cudas_dirs:
self.is_cuda_free["free"].append(param_cuda[1])
self.is_cuda_free["busy"].remove(param_cuda[1])
params_cudas_dirs = []
self.feedback(solutions, solution_results)
return solutions_ckptdirs
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import hashlib
import math
import os
import random
import six
import yaml
from paddlehub.common.logger import logger
from paddlehub.common.utils import is_windows
REWARD_SUM = 10000
if six.PY3:
INF = math.inf
else:
INF = float("inf")
class BaseEvaluator(object):
def __init__(self, params_file, finetunee_script):
with io.open(params_file, 'r', encoding='utf8') as f:
self.params = yaml.safe_load(f)
self.finetunee_script = finetunee_script
def get_init_params(self):
init_params = []
for param in self.params["param_list"]:
init_params.append(param['init_value'])
init_params = self.inverse_convert_params(init_params)
return init_params
def get_reward(self, result_output):
return REWARD_SUM - float(result_output)
def is_valid_params(self, params):
for i in range(0, len(self.params["param_list"])):
if params[i] < float(self.params["param_list"][i]["greater_than"]):
return False
if params[i] > float(self.params["param_list"][i]["lower_than"]):
return False
return True
def convert_params(self, params):
cparams = []
for i in range(0, len(self.params["param_list"])):
cparams.append(
float(self.params["param_list"][i]["greater_than"] +
(params[i] + 1.0) / 2.0 *
(self.params["param_list"][i]["lower_than"] -
self.params["param_list"][i]["greater_than"])))
if cparams[i] <= float(
self.params["param_list"][i]["greater_than"]):
cparams[i] = float(self.params["param_list"][i]["greater_than"])
if cparams[i] >= float(self.params["param_list"][i]["lower_than"]):
cparams[i] = float(self.params["param_list"][i]["lower_than"])
if self.params["param_list"][i]["type"] == "int":
cparams[i] = int(cparams[i])
return cparams
def inverse_convert_params(self, params):
cparams = []
for i in range(0, len(self.params["param_list"])):
cparams.append(
float(
-1.0 + 2.0 *
(params[i] - self.params["param_list"][i]["greater_than"]) /
(self.params["param_list"][i]["lower_than"] -
self.params["param_list"][i]["greater_than"])))
if cparams[i] <= -1.0:
cparams[i] = -1.0
if cparams[i] >= 1.0:
cparams[i] = 1.0
return cparams
def format_params_str(self, params):
param_str = "--%s=%s" % (self.params["param_list"][0]["name"],
params[0])
for i in range(1, len(self.params["param_list"])):
param_str = "%s --%s=%s" % (
param_str, self.params["param_list"][i]["name"], str(params[i]))
return param_str
def run(self, *args):
raise NotImplementedError
def new_round(self):
pass
class FullTrailEvaluator(BaseEvaluator):
def __init__(self, params_file, finetunee_script):
super(FullTrailEvaluator, self).__init__(params_file, finetunee_script)
def run(self, *args):
params = args[0][0]
num_cuda = args[0][1]
ckpt_dir = args[0][2]
log_file = args[0][3]
params = self.convert_params(params)
if not self.is_valid_params(params):
return REWARD_SUM
param_str = self.format_params_str(params)
f = open(log_file, "w")
f.close()
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
try:
os.system(run_cmd)
with open(log_file, "r") as f:
lines = f.readlines()
eval_result = lines[-1]
except:
print(
"WARNING: Program which was ran with hyperparameters as %s was crashed!"
% param_str.replace("--", ""))
eval_result = 0.0
reward = self.get_reward(eval_result)
self.model_rewards[ckpt_dir] = reward
return reward
class ModelBasedEvaluator(BaseEvaluator):
def __init__(self, params_file, finetunee_script):
super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script)
self.model_rewards = {}
self.half_best_model_ckpt = []
self.run_count = 0
def run(self, *args):
params = args[0][0]
num_cuda = args[0][1]
ckpt_dir = args[0][2]
log_file = args[0][3]
params = self.convert_params(params)
if not self.is_valid_params(params):
return REWARD_SUM
param_str = self.format_params_str(params)
f = open(log_file, "w")
f.close()
if len(self.half_best_model_ckpt) > 0:
model_path = self.half_best_model_ckpt[self.run_count % len(
self.half_best_model_ckpt)] + "/best_model"
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
else:
if is_windows():
run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
else:
run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
(num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
self.run_count += 1
try:
os.system(run_cmd)
with open(log_file, "r") as f:
lines = f.readlines()
eval_result = lines[-1]
except:
print(
"WARNING: Program which was ran with hyperparameters as %s was crashed!"
% param_str.replace("--", ""))
eval_result = 0.0
reward = self.get_reward(eval_result)
self.model_rewards[ckpt_dir] = reward
return reward
def new_round(self):
"""update self.half_best_model"""
half_size = int(len(self.model_rewards) / 2)
if half_size < 1:
half_size = 1
self.half_best_model_ckpt = list({
key
for key in sorted(
self.model_rewards, key=self.model_rewards.get, reverse=False)
[:half_size]
})
self.model_rewards = {}
...@@ -25,3 +25,4 @@ from . import help ...@@ -25,3 +25,4 @@ from . import help
from . import clear from . import clear
from . import config from . import config
from . import hub from . import hub
from . import autofinetune
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import io
import json
import os
import sys
import ast
import six
import pandas
import numpy as np
from paddlehub.commands.base_command import BaseCommand, ENTRY
from paddlehub.common.arg_helper import add_argument, print_arguments
from paddlehub.autofinetune.autoft import PSHE2
from paddlehub.autofinetune.evaluator import FullTrailEvaluator
from paddlehub.autofinetune.evaluator import ModelBasedEvaluator
from paddlehub.common.logger import logger
import paddlehub as hub
class AutoFineTuneCommand(BaseCommand):
name = "autofinetune"
def __init__(self, name):
super(AutoFineTuneCommand, self).__init__(name)
self.show_in_help = True
self.name = name
self.description = "Paddlehub helps to finetune a task by searching hyperparameters automatically."
self.parser = argparse.ArgumentParser(
description=self.__class__.__doc__,
prog='%s %s <task to be fintuned in python script>' % (ENTRY,
self.name),
usage='%(prog)s',
add_help=False)
self.module = None
def add_params_file_arg(self):
self.arg_params_to_be_searched_group.add_argument(
"--param_file",
type=str,
default=None,
required=True,
help=
"Hyperparameters to be searched in the yaml format. The number of hyperparameters searched must be greater than 1."
)
def add_autoft_config_arg(self):
self.arg_config_group.add_argument(
"--popsize", type=int, default=5, help="Population size")
self.arg_config_group.add_argument(
"--cuda",
type=ast.literal_eval,
default=['0'],
help="The list of gpu devices to be used")
self.arg_config_group.add_argument(
"--round", type=int, default=10, help="Number of searches")
self.arg_config_group.add_argument(
"--output_dir",
type=str,
default=None,
help="Directory to model checkpoint")
self.arg_config_group.add_argument(
"--evaluate_choice",
type=str,
default="fulltrail",
help="Choices: fulltrail or modelbased.")
def execute(self, argv):
if not argv:
print("ERROR: Please specify a script to be finetuned in python.\n")
self.help()
return False
self.fintunee_script = argv[0]
self.parser.prog = '%s %s %s' % (ENTRY, self.name, self.fintunee_script)
self.arg_params_to_be_searched_group = self.parser.add_argument_group(
title="Input options",
description="Hyperparameters to be searched.")
self.arg_config_group = self.parser.add_argument_group(
title="Autofinetune config options",
description=
"Autofintune configuration for controlling autofinetune behavior, not required"
)
self.add_params_file_arg()
self.add_autoft_config_arg()
if not argv[1:]:
self.help()
return False
self.args = self.parser.parse_args(argv[1:])
if self.args.evaluate_choice.lower() == "fulltrail":
evaluator = FullTrailEvaluator(self.args.param_file,
self.fintunee_script)
elif self.args.evaluate_choice.lower() == "modelbased":
evaluator = ModelBasedEvaluator(self.args.param_file,
self.fintunee_script)
else:
raise ValueError(
"The evaluate %s is not defined!" % self.args.evaluate_choice)
autoft = PSHE2(
evaluator,
cudas=self.args.cuda,
popsize=self.args.popsize,
output_dir=self.args.output_dir)
run_round_cnt = 0
solutions_ckptdirs = {}
print("PaddleHub Autofinetune starts.")
while (not autoft.is_stop()) and run_round_cnt < self.args.round:
print("PaddleHub Autofinetune starts round at %s." % run_round_cnt)
output_dir = autoft._output_dir + "/round" + str(run_round_cnt)
res = autoft.step(output_dir)
solutions_ckptdirs.update(res)
evaluator.new_round()
run_round_cnt = run_round_cnt + 1
print("PaddleHub Autofinetune ends.")
with open("./log_file.txt", "w") as f:
best_choice = evaluator.convert_params(autoft.optimal_solution())
print("The best hyperparameters:")
f.write("The best hyperparameters:\n")
param_name = []
for idx, param in enumerate(evaluator.params["param_list"]):
param_name.append(param["name"])
f.write(param["name"] + "\t:\t" + str(best_choice[idx]) + "\n")
print("%s : %s" % (param["name"], best_choice[idx]))
f.write("\n\n\n")
f.write("\t".join(param_name) + "\toutput_dir\n\n")
logger.info(
"The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ."
)
print(
"The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ."
)
for solution, ckptdir in solutions_ckptdirs.items():
param = evaluator.convert_params(solution)
param = [str(p) for p in param]
f.write("\t".join(param) + "\t" + ckptdir + "\n\n")
return True
command = AutoFineTuneCommand.instance()
...@@ -36,7 +36,6 @@ class InstallCommand(BaseCommand): ...@@ -36,7 +36,6 @@ class InstallCommand(BaseCommand):
prog='%s %s <module_name>' % (ENTRY, name), prog='%s %s <module_name>' % (ENTRY, name),
usage='%(prog)s', usage='%(prog)s',
add_help=False) add_help=False)
#TODO(wuzewu): add --upgrade option
def execute(self, argv): def execute(self, argv):
if not argv: if not argv:
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import os import os
# TODO: Change dir.py's filename, this naming rule is not qualified
USER_HOME = os.path.expanduser('~') USER_HOME = os.path.expanduser('~')
HUB_HOME = os.path.join(USER_HOME, ".paddlehub") HUB_HOME = os.path.join(USER_HOME, ".paddlehub")
MODULE_HOME = os.path.join(HUB_HOME, "modules") MODULE_HOME = os.path.join(HUB_HOME, "modules")
......
...@@ -77,7 +77,6 @@ class Downloader(object): ...@@ -77,7 +77,6 @@ class Downloader(object):
with open(file_name, 'wb') as f: with open(file_name, 'wb') as f:
shutil.copyfileobj(r.raw, f) shutil.copyfileobj(r.raw, f)
else: else:
#TODO(ZeyuChen) upgrade to tqdm process
with open(file_name, 'wb') as f: with open(file_name, 'wb') as f:
dl = 0 dl = 0
total_length = int(total_length) total_length = int(total_length)
......
...@@ -24,6 +24,7 @@ import requests ...@@ -24,6 +24,7 @@ import requests
import json import json
import yaml import yaml
import random import random
import fcntl
from random import randint from random import randint
from paddlehub.common import utils, srv_utils from paddlehub.common import utils, srv_utils
...@@ -38,6 +39,9 @@ CACHE_TIME = 60 * 10 ...@@ -38,6 +39,9 @@ CACHE_TIME = 60 * 10
class HubServer(object): class HubServer(object):
def __init__(self, config_file_path=None): def __init__(self, config_file_path=None):
LOCK_FILE = os.path.join(hub.HUB_HOME, '__LOCK__')
LOCK_FP = open(LOCK_FILE, 'a+')
fcntl.flock(LOCK_FP.fileno(), fcntl.LOCK_EX)
if not config_file_path: if not config_file_path:
config_file_path = os.path.join(hub.CONF_HOME, 'config.json') config_file_path = os.path.join(hub.CONF_HOME, 'config.json')
if not os.path.exists(hub.CONF_HOME): if not os.path.exists(hub.CONF_HOME):
...@@ -53,6 +57,7 @@ class HubServer(object): ...@@ -53,6 +57,7 @@ class HubServer(object):
self.server_url = self.config['server_url'] self.server_url = self.config['server_url']
self.request() self.request()
self._load_resource_list_file_if_valid() self._load_resource_list_file_if_valid()
LOCK_FP.close()
def get_server_url(self): def get_server_url(self):
random.seed(int(time.time())) random.seed(int(time.time()))
...@@ -178,7 +183,6 @@ class HubServer(object): ...@@ -178,7 +183,6 @@ class HubServer(object):
self.resource_list_file['version'][index] self.resource_list_file['version'][index]
for index in resource_index_list for index in resource_index_list
] ]
#TODO(wuzewu): version sort method
resource_version_list = sorted(resource_version_list) resource_version_list = sorted(resource_version_list)
if not version: if not version:
if not resource_version_list: if not resource_version_list:
......
...@@ -83,7 +83,6 @@ def from_param_to_module_attr(param, module_attr): ...@@ -83,7 +83,6 @@ def from_param_to_module_attr(param, module_attr):
module_attr.map.data['trainable']) module_attr.map.data['trainable'])
from_pyobj_to_module_attr(param.do_model_average, from_pyobj_to_module_attr(param.do_model_average,
module_attr.map.data['do_model_average']) module_attr.map.data['do_model_average'])
#TODO(wuzewu): don't save learning rate
from_pyobj_to_module_attr(param.optimize_attr, from_pyobj_to_module_attr(param.optimize_attr,
module_attr.map.data['optimize_attr']) module_attr.map.data['optimize_attr'])
from_pyobj_to_module_attr( from_pyobj_to_module_attr(
......
...@@ -117,7 +117,6 @@ def get_pykey(key, keyed_type): ...@@ -117,7 +117,6 @@ def get_pykey(key, keyed_type):
return str(key) return str(key)
#TODO(wuzewu): solving the problem of circular references
def from_pyobj_to_module_attr(pyobj, module_attr, obj_filter=None): def from_pyobj_to_module_attr(pyobj, module_attr, obj_filter=None):
if obj_filter and obj_filter(pyobj): if obj_filter and obj_filter(pyobj):
return return
......
...@@ -20,6 +20,7 @@ from .msra_ner import MSRA_NER ...@@ -20,6 +20,7 @@ from .msra_ner import MSRA_NER
from .nlpcc_dbqa import NLPCC_DBQA from .nlpcc_dbqa import NLPCC_DBQA
from .lcqmc import LCQMC from .lcqmc import LCQMC
from .toxic import Toxic from .toxic import Toxic
from .squad import SQUAD
from .xnli import XNLI from .xnli import XNLI
from .glue import GLUE from .glue import GLUE
......
...@@ -52,7 +52,6 @@ class ImageClassificationDataset(object): ...@@ -52,7 +52,6 @@ class ImageClassificationDataset(object):
return dataset_path return dataset_path
def _parse_data(self, data_path, shuffle=False, phase=None): def _parse_data(self, data_path, shuffle=False, phase=None):
def _base_reader():
data = [] data = []
with open(data_path, "r") as file: with open(data_path, "r") as file:
while True: while True:
...@@ -67,8 +66,7 @@ class ImageClassificationDataset(object): ...@@ -67,8 +66,7 @@ class ImageClassificationDataset(object):
image_path = items[0] image_path = items[0]
if not os.path.isabs(image_path): if not os.path.isabs(image_path):
if self.base_path is not None: if self.base_path is not None:
image_path = os.path.join(self.base_path, image_path = os.path.join(self.base_path, image_path)
image_path)
label = items[-1] label = items[-1]
data.append((image_path, items[-1])) data.append((image_path, items[-1]))
...@@ -82,6 +80,7 @@ class ImageClassificationDataset(object): ...@@ -82,6 +80,7 @@ class ImageClassificationDataset(object):
if shuffle: if shuffle:
np.random.shuffle(data) np.random.shuffle(data)
def _base_reader():
for item in data: for item in data:
yield item yield item
......
...@@ -39,11 +39,18 @@ class GLUE(HubDataset): ...@@ -39,11 +39,18 @@ class GLUE(HubDataset):
def __init__(self, sub_dataset='SST-2'): def __init__(self, sub_dataset='SST-2'):
# sub_dataset : CoLA, MNLI, MRPC, QNLI, QQP, RTE, SST-2, STS-B # sub_dataset : CoLA, MNLI, MRPC, QNLI, QQP, RTE, SST-2, STS-B
if sub_dataset not in [ if sub_dataset not in [
'CoLA', 'MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', 'STS-B' 'CoLA', 'MNLI', 'MNLI_m', 'MNLI_mm', 'MRPC', 'QNLI', 'QQP',
'RTE', 'SST-2', 'STS-B'
]: ]:
raise Exception( raise Exception(
sub_dataset + sub_dataset +
" is not in GLUE benchmark. Please confirm the data set") " is not in GLUE benchmark. Please confirm the data set")
self.mismatch = False
if sub_dataset == 'MNLI_mm':
sub_dataset = 'MNLI'
self.mismatch = True
elif sub_dataset == 'MNLI_m':
sub_dataset = 'MNLI'
self.sub_dataset = sub_dataset self.sub_dataset = sub_dataset
self.dataset_dir = os.path.join(DATA_HOME, "glue_data") self.dataset_dir = os.path.join(DATA_HOME, "glue_data")
...@@ -64,9 +71,12 @@ class GLUE(HubDataset): ...@@ -64,9 +71,12 @@ class GLUE(HubDataset):
self.train_examples = self._read_tsv(self.train_file) self.train_examples = self._read_tsv(self.train_file)
def _load_dev_examples(self): def _load_dev_examples(self):
if self.sub_dataset == 'MNLI': if self.sub_dataset == 'MNLI' and not self.mismatch:
self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset, self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
"dev_matched.tsv") "dev_matched.tsv")
elif self.sub_dataset == 'MNLI' and self.mismatch:
self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
"dev_mismatched.tsv")
else: else:
self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset, self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
"dev.tsv") "dev.tsv")
...@@ -76,9 +86,12 @@ class GLUE(HubDataset): ...@@ -76,9 +86,12 @@ class GLUE(HubDataset):
self.test_examples = [] self.test_examples = []
def _load_predict_examples(self): def _load_predict_examples(self):
if self.sub_dataset == 'MNLI': if self.sub_dataset == 'MNLI' and not self.mismatch:
self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset, self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
"test_matched.tsv") "test_matched.tsv")
elif self.sub_dataset == 'MNLI' and self.mismatch:
self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
"test_mismatched.tsv")
else: else:
self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset, self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
"test.tsv") "test.tsv")
...@@ -187,7 +200,7 @@ class GLUE(HubDataset): ...@@ -187,7 +200,7 @@ class GLUE(HubDataset):
seq_id += 1 seq_id += 1
examples.append(example) examples.append(example)
except: except:
print("[Discard Incorrect Data] " + "\t".join(line)) logger.info("[Discard Incorrect Data] " + "\t".join(line))
return examples return examples
......
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
import json
import os
import sys
from paddlehub.reader import tokenization
from paddlehub.common.downloader import default_downloader
from paddlehub.common.dir import DATA_HOME
from paddlehub.common.logger import logger
_DATA_URL = "https://bj.bcebos.com/paddlehub-dataset/squad.tar.gz"
class SquadExample(object):
"""A single training/test example for simple sequence classification.
For examples without an answer, the start and end position are -1.
"""
def __init__(self,
qas_id,
question_text,
doc_tokens,
orig_answer_text=None,
start_position=None,
end_position=None,
is_impossible=False):
self.qas_id = qas_id
self.question_text = question_text
self.doc_tokens = doc_tokens
self.orig_answer_text = orig_answer_text
self.start_position = start_position
self.end_position = end_position
self.is_impossible = is_impossible
def __str__(self):
return self.__repr__()
def __repr__(self):
s = ""
s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
s += ", question_text: %s" % (tokenization.printable_text(
self.question_text))
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.start_position:
s += ", end_position: %d" % (self.end_position)
if self.start_position:
s += ", is_impossible: %r" % (self.is_impossible)
return s
class SQUAD(object):
"""A single set of features of data."""
def __init__(self, version_2_with_negative=False):
self.dataset_dir = os.path.join(DATA_HOME, "squad_data")
if not os.path.exists(self.dataset_dir):
ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
else:
logger.info("Dataset {} already cached.".format(self.dataset_dir))
self._load_train_examples(version_2_with_negative, is_training=True)
self._load_predict_examples(version_2_with_negative, is_training=False)
def _load_train_examples(self,
version_2_with_negative=False,
is_training=True):
if not version_2_with_negative:
self.train_file = os.path.join(self.dataset_dir, "train-v1.1.json")
else:
self.train_file = os.path.join(self.dataset_dir, "train-v2.0.json")
self.train_examples = self._read_json(self.train_file, is_training,
version_2_with_negative)
def _load_predict_examples(self,
version_2_with_negative=False,
is_training=False):
if not version_2_with_negative:
self.predict_file = os.path.join(self.dataset_dir, "dev-v1.1.json")
else:
self.predict_file = os.path.join(self.dataset_dir, "dev-v2.0.json")
self.predict_examples = self._read_json(self.predict_file, is_training,
version_2_with_negative)
def get_train_examples(self):
return self.train_examples
def get_dev_examples(self):
return []
def get_test_examples(self):
return []
def _read_json(self, input_file, is_training,
version_2_with_negative=False):
"""Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r") as reader:
input_data = json.load(reader)["data"]
def is_whitespace(c):
if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(
c) == 0x202F:
return True
return False
examples = []
for entry in input_data:
for paragraph in entry["paragraphs"]:
paragraph_text = paragraph["context"]
doc_tokens = []
char_to_word_offset = []
prev_is_whitespace = True
for c in paragraph_text:
if is_whitespace(c):
prev_is_whitespace = True
else:
if prev_is_whitespace:
doc_tokens.append(c)
else:
doc_tokens[-1] += c
prev_is_whitespace = False
char_to_word_offset.append(len(doc_tokens) - 1)
for qa in paragraph["qas"]:
qas_id = qa["id"]
question_text = qa["question"]
start_position = None
end_position = None
orig_answer_text = None
is_impossible = False
if is_training:
if version_2_with_negative:
is_impossible = qa["is_impossible"]
if (len(qa["answers"]) != 1) and (not is_impossible):
raise ValueError(
"For training, each question should have exactly 1 answer."
)
if not is_impossible:
answer = qa["answers"][0]
orig_answer_text = answer["text"]
answer_offset = answer["answer_start"]
answer_length = len(orig_answer_text)
start_position = char_to_word_offset[answer_offset]
end_position = char_to_word_offset[
answer_offset + answer_length - 1]
# Only add answers where the text can be exactly recovered from the
# document. If this CAN'T happen it's likely due to weird Unicode
# stuff so we will just skip the example.
#
# Note that this means for training mode, every example is NOT
# guaranteed to be preserved.
actual_text = " ".join(
doc_tokens[start_position:(end_position + 1)])
cleaned_answer_text = " ".join(
tokenization.whitespace_tokenize(
orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
logger.warning(
"Could not find answer: '%s' vs. '%s'",
actual_text, cleaned_answer_text)
continue
else:
start_position = -1
end_position = -1
orig_answer_text = ""
example = SquadExample(
qas_id=qas_id,
question_text=question_text,
doc_tokens=doc_tokens,
orig_answer_text=orig_answer_text,
start_position=start_position,
end_position=end_position,
is_impossible=is_impossible)
examples.append(example)
return examples
if __name__ == "__main__":
ds = SQUAD(version_2_with_negative=True)
examples = ds.get_dev_examples()
for index, e in enumerate(examples):
if index < 10:
print(e)
...@@ -43,6 +43,7 @@ class XNLI(HubDataset): ...@@ -43,6 +43,7 @@ class XNLI(HubDataset):
"ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw",
"th", "tr", "ur", "vi", "zh" "th", "tr", "ur", "vi", "zh"
]: ]:
raise Exception(language + raise Exception(language +
"is not in XNLI. Please confirm the language") "is not in XNLI. Please confirm the language")
self.language = language self.language = language
......
...@@ -22,4 +22,5 @@ message CheckPoint { ...@@ -22,4 +22,5 @@ message CheckPoint {
int64 current_epoch = 1; int64 current_epoch = 1;
int64 global_step = 2; int64 global_step = 2;
string latest_model_dir = 3; string latest_model_dir = 3;
double best_score = 4;
} }
...@@ -37,6 +37,7 @@ def load_checkpoint(checkpoint_dir, exe, main_program): ...@@ -37,6 +37,7 @@ def load_checkpoint(checkpoint_dir, exe, main_program):
ckpt.ParseFromString(f.read()) ckpt.ParseFromString(f.read())
current_epoch = 1 current_epoch = 1
global_step = 0 global_step = 0
best_score = -999
def if_exist(var): def if_exist(var):
return os.path.exists(os.path.join(ckpt.latest_model_dir, var.name)) return os.path.exists(os.path.join(ckpt.latest_model_dir, var.name))
...@@ -45,20 +46,27 @@ def load_checkpoint(checkpoint_dir, exe, main_program): ...@@ -45,20 +46,27 @@ def load_checkpoint(checkpoint_dir, exe, main_program):
fluid.io.load_vars( fluid.io.load_vars(
exe, ckpt.latest_model_dir, main_program, predicate=if_exist) exe, ckpt.latest_model_dir, main_program, predicate=if_exist)
# Compatible with older versions without best_score in checkpoint_pb2
try:
best_score = ckpt.best_score
except:
best_score = -999
logger.info("PaddleHub model checkpoint loaded. current_epoch={}, " logger.info("PaddleHub model checkpoint loaded. current_epoch={}, "
"global_step={}".format(ckpt.current_epoch, "global_step={}, best_score={:.5f}".format(
ckpt.global_step)) ckpt.current_epoch, ckpt.global_step, best_score))
return True, ckpt.current_epoch, ckpt.global_step
return True, ckpt.current_epoch, ckpt.global_step, best_score
logger.info( logger.info("PaddleHub model checkpoint not found, start from scratch...")
"PaddleHub model checkpoint not found, start training from scratch...")
return False, current_epoch, global_step return False, current_epoch, global_step, best_score
def save_checkpoint(checkpoint_dir, def save_checkpoint(checkpoint_dir,
current_epoch, current_epoch,
global_step, global_step,
best_score,
exe, exe,
main_program=fluid.default_main_program()): main_program=fluid.default_main_program()):
...@@ -73,5 +81,6 @@ def save_checkpoint(checkpoint_dir, ...@@ -73,5 +81,6 @@ def save_checkpoint(checkpoint_dir,
ckpt.current_epoch = current_epoch ckpt.current_epoch = current_epoch
ckpt.global_step = global_step ckpt.global_step = global_step
ckpt.latest_model_dir = model_saved_dir ckpt.latest_model_dir = model_saved_dir
ckpt.best_score = best_score
with open(ckpt_meta_path, "wb") as f: with open(ckpt_meta_path, "wb") as f:
f.write(ckpt.SerializeToString()) f.write(ckpt.SerializeToString())
#coding:utf-8
# Generated by the protocol buffer compiler. DO NOT EDIT! # Generated by the protocol buffer compiler. DO NOT EDIT!
# source: checkpoint.proto # source: checkpoint.proto
...@@ -18,7 +17,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( ...@@ -18,7 +17,7 @@ DESCRIPTOR = _descriptor.FileDescriptor(
package='paddlehub.task.checkpoint', package='paddlehub.task.checkpoint',
syntax='proto3', syntax='proto3',
serialized_pb=_b( serialized_pb=_b(
'\n\x10\x63heckpoint.proto\x12\x19paddlehub.task.checkpoint\"R\n\nCheckPoint\x12\x15\n\rcurrent_epoch\x18\x01 \x01(\x03\x12\x13\n\x0bglobal_step\x18\x02 \x01(\x03\x12\x18\n\x10latest_model_dir\x18\x03 \x01(\tB\x02H\x03\x62\x06proto3' '\n\x10\x63heckpoint.proto\x12\x19paddlehub.task.checkpoint\"f\n\nCheckPoint\x12\x15\n\rcurrent_epoch\x18\x01 \x01(\x03\x12\x13\n\x0bglobal_step\x18\x02 \x01(\x03\x12\x18\n\x10latest_model_dir\x18\x03 \x01(\t\x12\x12\n\nbest_score\x18\x04 \x01(\x01\x42\x02H\x03\x62\x06proto3'
)) ))
_sym_db.RegisterFileDescriptor(DESCRIPTOR) _sym_db.RegisterFileDescriptor(DESCRIPTOR)
...@@ -77,6 +76,22 @@ _CHECKPOINT = _descriptor.Descriptor( ...@@ -77,6 +76,22 @@ _CHECKPOINT = _descriptor.Descriptor(
is_extension=False, is_extension=False,
extension_scope=None, extension_scope=None,
options=None), options=None),
_descriptor.FieldDescriptor(
name='best_score',
full_name='paddlehub.task.checkpoint.CheckPoint.best_score',
index=3,
number=4,
type=1,
cpp_type=5,
label=1,
has_default_value=False,
default_value=float(0),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
options=None),
], ],
extensions=[], extensions=[],
nested_types=[], nested_types=[],
...@@ -87,7 +102,7 @@ _CHECKPOINT = _descriptor.Descriptor( ...@@ -87,7 +102,7 @@ _CHECKPOINT = _descriptor.Descriptor(
extension_ranges=[], extension_ranges=[],
oneofs=[], oneofs=[],
serialized_start=47, serialized_start=47,
serialized_end=129, serialized_end=149,
) )
DESCRIPTOR.message_types_by_name['CheckPoint'] = _CHECKPOINT DESCRIPTOR.message_types_by_name['CheckPoint'] = _CHECKPOINT
......
...@@ -128,3 +128,75 @@ def calculate_f1(num_label, num_infer, num_correct): ...@@ -128,3 +128,75 @@ def calculate_f1(num_label, num_infer, num_correct):
else: else:
f1 = 2 * precision * recall / (precision + recall) f1 = 2 * precision * recall / (precision + recall)
return precision, recall, f1 return precision, recall, f1
def calculate_f1_np(preds, labels):
preds = np.array(preds)
labels = np.array(labels)
tp = np.sum((labels == 1) & (preds == 1))
tn = np.sum((labels == 0) & (preds == 0))
fp = np.sum((labels == 0) & (preds == 1))
fn = np.sum((labels == 1) & (preds == 0))
p = tp / (tp + fp) if (tp + fp) else 0
r = tp / (tp + fn) if (tp + fn) else 0
f1 = (2 * p * r) / (p + r) if p + r else 0
return f1
def matthews_corrcoef(preds, labels):
preds = np.array(preds)
labels = np.array(labels)
tp = np.sum((labels == 1) & (preds == 1))
tn = np.sum((labels == 0) & (preds == 0))
fp = np.sum((labels == 0) & (preds == 1))
fn = np.sum((labels == 1) & (preds == 0))
div = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
mcc = ((tp * tn) - (fp * fn)) / np.sqrt(div) if div else 0
return mcc
def recall_nk(data, n, k, m):
'''
This metric can be used to evaluate whether the model can find the correct response B for question A
Note: Only applies to each question A only has one correct response B1.
Parameters
----------
data: List. Each element is a tuple, consist of the positive probability of the sample prediction and its label.
For each example, the only one true positive sample should be the first tuple.
n: int. The number of labels per example.
eg: [A,B1,1], [A,B2,0], [A,B3,0] n=3 as there has 3 labels for example A
k: int. If the top k is right, the example will be considered right.
eg: [A,B1,1]=0.5, [A,B2,0]=0.8, [A,B3,0]=0.3(Probability of 1)
If k=2, the prediction for the example A will be considered correct as 0.5 is the top2 Probability
If k=1, the prediction will be considered wrong as 0.5 is not the biggest probability.
m: int. For every m examples, there's going to be a positive sample.
eg. data= [A1,B1,1], [A1,B2,0], [A1,B3,0], [A2,B1,1], [A2,B2,0], [A2,B3,0]
For every 3 examples, there will be one positive sample. so m=3, and n can be 1,2 or 3.
'''
def get_p_at_n_in_m(data, n, k, ind):
"""
calculate precision in recall n
"""
pos_score = data[ind][0]
curr = data[ind:ind + n]
curr = sorted(curr, key=lambda x: x[0], reverse=True)
if curr[k - 1][0] <= pos_score:
return 1
return 0
correct_num = 0.0
length = len(data) // m
for i in range(0, length):
ind = i * m
assert data[ind][1] == 1
correct_num += get_p_at_n_in_m(data, n, k, ind)
return correct_num / length
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
from paddle.fluid.layers import control_flow
from paddlehub.common.logger import logger
def adam_weight_decay_optimization(loss,
warmup_steps,
num_train_steps,
learning_rate,
main_program,
weight_decay,
scheduler='linear_decay'):
if scheduler == 'noam_decay':
if warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
else:
logger.warning(
"Noam decay learning rate scheduler should have positive \
warmup steps, using constant learning rate instead!")
scheduled_lr = fluid.layers.create_global_var(
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True,
name="learning_rate")
elif scheduler == 'linear_decay':
scheduled_lr = linear_warmup_decay(learning_rate, num_train_steps,
warmup_steps, main_program)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
clip_norm_thres = 1.0
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
for param in main_program.global_block().all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
if weight_decay > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param.name):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr
def linear_warmup_decay(init_lr, num_train_steps, num_warmup_steps,
main_program):
with main_program._lr_schedule_guard():
global_step = lr_scheduler._decay_step_counter()
lr = fluid.layers.create_global_var(
shape=[1],
value=init_lr,
dtype='float32',
persistable=True,
name="learning_rate")
with control_flow.Switch() as switch:
with switch.case(global_step < num_warmup_steps):
decayed_lr = init_lr * global_step * 1.0 / num_warmup_steps
fluid.layers.assign(decayed_lr, lr)
with switch.default():
decayed_lr = lr_scheduler.polynomial_decay(
learning_rate=init_lr,
decay_steps=num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.assign(decayed_lr, lr)
return lr
此差异已折叠。
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .basic_task import BasicTask, RunEnv, RunState
from .classifier_task import ClassifierTask, ImageClassifierTask, TextClassifierTask, MultiLabelClassifierTask
from .reading_comprehension_task import ReadingComprehensionTask
from .regression_task import RegressionTask
from .sequence_task import SequenceLabelTask
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import paddle.fluid as fluid
from paddlehub.finetune.evaluate import calculate_f1_np, matthews_corrcoef
from .basic_task import BasicTask
class ClassifierTask(BasicTask):
def __init__(self,
feature,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None,
metrics_choices="default"):
if metrics_choices == "default":
metrics_choices = ["acc"]
main_program = feature.block.program
super(ClassifierTask, self).__init__(
data_reader=data_reader,
main_program=main_program,
feed_list=feed_list,
startup_program=startup_program,
config=config,
metrics_choices=metrics_choices)
self.feature = feature
self.num_classes = num_classes
self.hidden_units = hidden_units
def _build_net(self):
cls_feats = self.feature
if self.hidden_units is not None:
for n_hidden in self.hidden_units:
cls_feats = fluid.layers.fc(
input=cls_feats, size=n_hidden, act="relu")
logits = fluid.layers.fc(
input=cls_feats,
size=self.num_classes,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
act="softmax")
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(logits, axis=1), shape=[-1, 1])
return [logits]
def _add_label(self):
return [fluid.layers.data(name="label", dtype="int64", shape=[1])]
def _add_loss(self):
ce_loss = fluid.layers.cross_entropy(
input=self.outputs[0], label=self.labels[0])
return fluid.layers.mean(x=ce_loss)
def _add_metrics(self):
acc = fluid.layers.accuracy(input=self.outputs[0], label=self.labels[0])
return [acc]
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
return [self.labels[0].name, self.ret_infers.name
] + [metric.name
for metric in self.metrics] + [self.loss.name]
return [output.name for output in self.outputs]
def _calculate_metrics(self, run_states):
loss_sum = acc_sum = run_examples = 0
run_step = run_time_used = 0
all_labels = np.array([])
all_infers = np.array([])
for run_state in run_states:
run_examples += run_state.run_examples
run_step += run_state.run_step
loss_sum += np.mean(
run_state.run_results[-1]) * run_state.run_examples
acc_sum += np.mean(
run_state.run_results[2]) * run_state.run_examples
np_labels = run_state.run_results[0]
np_infers = run_state.run_results[1]
all_labels = np.hstack((all_labels, np_labels.reshape([-1])))
all_infers = np.hstack((all_infers, np_infers.reshape([-1])))
run_time_used = time.time() - run_states[0].run_time_begin
avg_loss = loss_sum / run_examples
run_speed = run_step / run_time_used
# The first key will be used as main metrics to update the best model
scores = OrderedDict()
for metric in self.metrics_choices:
if metric == "acc":
avg_acc = acc_sum / run_examples
scores["acc"] = avg_acc
elif metric == "f1":
f1 = calculate_f1_np(all_infers, all_labels)
scores["f1"] = f1
elif metric == "matthews":
matthews = matthews_corrcoef(all_infers, all_labels)
scores["matthews"] = matthews
else:
raise ValueError("Not Support Metric: \"%s\"" % metric)
return scores, avg_loss, run_speed
ImageClassifierTask = ClassifierTask
class TextClassifierTask(ClassifierTask):
def __init__(self,
feature,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None,
metrics_choices="default"):
if metrics_choices == "default":
metrics_choices = ["acc"]
super(TextClassifierTask, self).__init__(
data_reader=data_reader,
feature=feature,
num_classes=num_classes,
feed_list=feed_list,
startup_program=startup_program,
config=config,
hidden_units=hidden_units,
metrics_choices=metrics_choices)
def _build_net(self):
cls_feats = fluid.layers.dropout(
x=self.feature,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if self.hidden_units is not None:
for n_hidden in self.hidden_units:
cls_feats = fluid.layers.fc(
input=cls_feats, size=n_hidden, act="relu")
logits = fluid.layers.fc(
input=cls_feats,
size=self.num_classes,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
act="softmax")
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(logits, axis=1), shape=[-1, 1])
return [logits]
class MultiLabelClassifierTask(ClassifierTask):
def __init__(self,
feature,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None,
metrics_choices="default"):
if metrics_choices == "default":
metrics_choices = ["auc"]
main_program = feature.block.program
super(MultiLabelClassifierTask, self).__init__(
data_reader=data_reader,
feature=feature,
num_classes=num_classes,
feed_list=feed_list,
startup_program=startup_program,
config=config,
hidden_units=hidden_units,
metrics_choices=metrics_choices)
self.class_name = list(data_reader.label_map.keys())
def _build_net(self):
cls_feats = fluid.layers.dropout(
x=self.feature,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if self.hidden_units is not None:
for n_hidden in self.hidden_units:
cls_feats = fluid.layers.fc(
input=cls_feats, size=n_hidden, act="relu")
probs = []
for i in range(self.num_classes):
probs.append(
fluid.layers.fc(
input=cls_feats,
size=2,
param_attr=fluid.ParamAttr(
name="cls_out_w_%d" % i,
initializer=fluid.initializer.TruncatedNormal(
scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b_%d" % i,
initializer=fluid.initializer.Constant(0.)),
act="softmax"))
return probs
def _add_label(self):
label = fluid.layers.data(
name="label", shape=[self.num_classes], dtype='int64')
return [label]
def _add_loss(self):
label_split = fluid.layers.split(
self.labels[0], self.num_classes, dim=-1)
total_loss = fluid.layers.fill_constant(
shape=[1], value=0.0, dtype='float64')
for index, probs in enumerate(self.outputs):
ce_loss = fluid.layers.cross_entropy(
input=probs, label=label_split[index])
total_loss += fluid.layers.reduce_sum(ce_loss)
loss = fluid.layers.mean(x=total_loss)
return loss
def _add_metrics(self):
label_split = fluid.layers.split(
self.labels[0], self.num_classes, dim=-1)
# metrics change to auc of every class
eval_list = []
for index, probs in enumerate(self.outputs):
current_auc, _, _ = fluid.layers.auc(
input=probs, label=label_split[index])
eval_list.append(current_auc)
return eval_list
def _calculate_metrics(self, run_states):
loss_sum = acc_sum = run_examples = 0
run_step = run_time_used = 0
for run_state in run_states:
run_examples += run_state.run_examples
run_step += run_state.run_step
loss_sum += np.mean(
run_state.run_results[-1]) * run_state.run_examples
auc_list = run_states[-1].run_results[:-1]
run_time_used = time.time() - run_states[0].run_time_begin
avg_loss = loss_sum / (run_examples * self.num_classes)
run_speed = run_step / run_time_used
# The first key will be used as main metrics to update the best model
scores = OrderedDict()
for metric in self.metrics_choices:
if metric == "auc":
scores["auc"] = np.mean(auc_list)
# NOTE: for MultiLabelClassifierTask, the metrics will be used to evaluate all the label
# and their mean value will also be reported.
for index, auc in enumerate(auc_list):
scores["auc_" + self.class_name[index]] = auc_list[index][0]
else:
raise ValueError("Not Support Metric: \"%s\"" % metric)
return scores, avg_loss, run_speed
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
return [metric.name for metric in self.metrics] + [self.loss.name]
return self.outputs
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import paddle.fluid as fluid
from .basic_task import BasicTask
class ReadingComprehensionTask(BasicTask):
def __init__(self,
feature,
feed_list,
data_reader,
startup_program=None,
config=None,
metrics_choices=None):
main_program = feature.block.program
super(ReadingComprehensionTask, self).__init__(
data_reader=data_reader,
main_program=main_program,
feed_list=feed_list,
startup_program=startup_program,
config=config,
metrics_choices=metrics_choices)
self.feature = feature
def _build_net(self):
if self.is_predict_phase:
self.unique_id = fluid.layers.data(
name="start_positions",
shape=[-1, 1],
lod_level=0,
dtype="int64")
logits = fluid.layers.fc(
input=self.feature,
size=2,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name="cls_seq_label_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_seq_label_out_b",
initializer=fluid.initializer.Constant(0.)))
logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)
batch_ones = fluid.layers.fill_constant_batch_size_like(
input=start_logits, dtype='int64', shape=[1], value=1)
num_seqs = fluid.layers.reduce_sum(input=batch_ones)
return [start_logits, end_logits, num_seqs]
def _add_label(self):
start_positions = fluid.layers.data(
name="start_positions", shape=[-1, 1], lod_level=0, dtype="int64")
end_positions = fluid.layers.data(
name="end_positions", shape=[-1, 1], lod_level=0, dtype="int64")
return [start_positions, end_positions]
def _add_loss(self):
start_positions = self.labels[0]
end_positions = self.labels[1]
start_logits = self.outputs[0]
end_logits = self.outputs[1]
start_loss = fluid.layers.softmax_with_cross_entropy(
logits=start_logits, label=start_positions)
start_loss = fluid.layers.mean(x=start_loss)
end_loss = fluid.layers.softmax_with_cross_entropy(
logits=end_logits, label=end_positions)
end_loss = fluid.layers.mean(x=end_loss)
total_loss = (start_loss + end_loss) / 2.0
return total_loss
def _add_metrics(self):
return []
@property
def feed_list(self):
feed_list = [varname for varname in self._base_feed_list]
if self.is_train_phase:
feed_list += [self.labels[0].name, self.labels[1].name]
elif self.is_predict_phase:
feed_list += [self.unique_id.name]
return feed_list
@property
def fetch_list(self):
if self.is_train_phase:
return [metric.name for metric in self.metrics
] + [self.loss.name, self.outputs[-1].name]
elif self.is_predict_phase:
return [self.unique_id.name
] + [output.name for output in self.outputs]
def _calculate_metrics(self, run_states):
total_cost, total_num_seqs = [], []
run_step = run_time_used = run_examples = 0
for run_state in run_states:
np_loss = run_state.run_results[0]
np_num_seqs = run_state.run_results[1]
total_cost.extend(np_loss * np_num_seqs)
total_num_seqs.extend(np_num_seqs)
run_examples += run_state.run_examples
run_step += run_state.run_step
run_time_used = time.time() - run_states[0].run_time_begin
run_speed = run_step / run_time_used
avg_loss = np.sum(total_cost) / np.sum(total_num_seqs)
scores = OrderedDict()
# If none of metrics has been implemented, loss will be used to evaluate.
return scores, avg_loss, run_speed
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import paddle.fluid as fluid
from scipy.stats import spearmanr
from .basic_task import BasicTask
class RegressionTask(BasicTask):
def __init__(self,
feature,
feed_list,
data_reader,
startup_program=None,
config=None,
hidden_units=None,
metrics_choices="default"):
if metrics_choices == "default":
metrics_choices = ["spearman"]
main_program = feature.block.program
super(RegressionTask, self).__init__(
data_reader=data_reader,
main_program=main_program,
feed_list=feed_list,
startup_program=startup_program,
config=config,
metrics_choices=metrics_choices)
self.feature = feature
self.hidden_units = hidden_units
def _build_net(self):
cls_feats = fluid.layers.dropout(
x=self.feature,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if self.hidden_units is not None:
for n_hidden in self.hidden_units:
cls_feats = fluid.layers.fc(
input=cls_feats, size=n_hidden, act="relu")
logits = fluid.layers.fc(
input=cls_feats,
size=1,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
act=None)
return [logits]
def _add_label(self):
return [fluid.layers.data(name="label", dtype="float32", shape=[1])]
def _add_loss(self):
cost = fluid.layers.square_error_cost(
input=self.outputs[0], label=self.labels[0])
return fluid.layers.mean(x=cost)
def _add_metrics(self):
return []
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
return [self.labels[0].name, self.outputs[0].name
] + [metric.name
for metric in self.metrics] + [self.loss.name]
return [output.name for output in self.outputs]
def _calculate_metrics(self, run_states):
loss_sum = run_examples = 0
run_step = run_time_used = 0
all_labels = np.array([])
all_infers = np.array([])
for run_state in run_states:
run_examples += run_state.run_examples
run_step += run_state.run_step
loss_sum += np.mean(
run_state.run_results[-1]) * run_state.run_examples
np_labels = run_state.run_results[0]
np_infers = run_state.run_results[1]
all_labels = np.hstack((all_labels, np_labels.reshape([-1])))
all_infers = np.hstack((all_infers, np_infers.reshape([-1])))
run_time_used = time.time() - run_states[0].run_time_begin
avg_loss = loss_sum / run_examples
run_speed = run_step / run_time_used
# The first key will be used as main metrics to update the best model
scores = OrderedDict()
for metric in self.metrics_choices:
if metric == "spearman":
spearman_correlations = spearmanr(all_labels, all_infers)[0]
scores["spearman"] = spearman_correlations
else:
raise ValueError("Not Support Metric: \"%s\"" % metric)
return scores, avg_loss, run_speed
#coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
from collections import OrderedDict
import numpy as np
import paddle.fluid as fluid
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
from .basic_task import BasicTask
class SequenceLabelTask(BasicTask):
def __init__(self,
feature,
max_seq_len,
num_classes,
feed_list,
data_reader,
startup_program=None,
config=None,
metrics_choices="default"):
if metrics_choices == "default":
metrics_choices = ["f1", "precision", "recall"]
main_program = feature.block.program
super(SequenceLabelTask, self).__init__(
data_reader=data_reader,
main_program=main_program,
feed_list=feed_list,
startup_program=startup_program,
config=config,
metrics_choices=metrics_choices)
self.feature = feature
self.max_seq_len = max_seq_len
self.num_classes = num_classes
def _build_net(self):
self.logits = fluid.layers.fc(
input=self.feature,
size=self.num_classes,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name="cls_seq_label_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_seq_label_out_b",
initializer=fluid.initializer.Constant(0.)))
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
ret_infers = fluid.layers.assign(self.ret_infers)
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64')
seq_len = fluid.layers.assign(self.seq_len)
logits = self.logits
logits = fluid.layers.flatten(logits, axis=2)
logits = fluid.layers.softmax(logits)
self.num_labels = logits.shape[1]
return [logits]
def _add_label(self):
label = fluid.layers.data(
name="label", shape=[self.max_seq_len, 1], dtype='int64')
return [label]
def _add_loss(self):
labels = fluid.layers.flatten(self.labels[0], axis=2)
ce_loss = fluid.layers.cross_entropy(
input=self.outputs[0], label=labels)
loss = fluid.layers.mean(x=ce_loss)
return loss
def _add_metrics(self):
self.ret_labels = fluid.layers.reshape(x=self.labels[0], shape=[-1, 1])
return [self.ret_labels, self.ret_infers, self.seq_len]
def _calculate_metrics(self, run_states):
total_infer = total_label = total_correct = loss_sum = 0
run_step = run_time_used = run_examples = 0
for run_state in run_states:
loss_sum += np.mean(run_state.run_results[-1])
np_labels = run_state.run_results[0]
np_infers = run_state.run_results[1]
np_lens = run_state.run_results[2]
label_num, infer_num, correct_num = chunk_eval(
np_labels, np_infers, np_lens, self.num_labels,
self.device_count)
total_infer += infer_num
total_label += label_num
total_correct += correct_num
run_examples += run_state.run_examples
run_step += run_state.run_step
run_time_used = time.time() - run_states[0].run_time_begin
run_speed = run_step / run_time_used
avg_loss = loss_sum / run_examples
precision, recall, f1 = calculate_f1(total_label, total_infer,
total_correct)
# The first key will be used as main metrics to update the best model
scores = OrderedDict()
for metric in self.metrics_choices:
if metric == "precision":
scores["precision"] = precision
elif metric == "recall":
scores["recall"] = recall
elif metric == "f1":
scores["f1"] = f1
else:
raise ValueError("Not Support Metric: \"%s\"" % metric)
return scores, avg_loss, run_speed
@property
def feed_list(self):
feed_list = [varname for varname in self._base_feed_list]
if self.is_train_phase or self.is_test_phase:
feed_list += [self.labels[0].name, self.seq_len.name]
else:
feed_list += [self.seq_len.name]
return feed_list
@property
def fetch_list(self):
if self.is_train_phase or self.is_test_phase:
return [metric.name for metric in self.metrics] + [self.loss.name]
elif self.is_predict_phase:
return [self.ret_infers.name] + [self.seq_len.name]
return [output.name for output in self.outputs]
...@@ -26,6 +26,7 @@ from paddlehub.common.downloader import default_downloader ...@@ -26,6 +26,7 @@ from paddlehub.common.downloader import default_downloader
from paddlehub.common.dir import MODULE_HOME from paddlehub.common.dir import MODULE_HOME
from paddlehub.module import module_desc_pb2 from paddlehub.module import module_desc_pb2
import paddlehub as hub import paddlehub as hub
from paddlehub.common.logger import logger
class LocalModuleManager(object): class LocalModuleManager(object):
...@@ -35,23 +36,26 @@ class LocalModuleManager(object): ...@@ -35,23 +36,26 @@ class LocalModuleManager(object):
if not os.path.exists(self.local_modules_dir): if not os.path.exists(self.local_modules_dir):
utils.mkdir(self.local_modules_dir) utils.mkdir(self.local_modules_dir)
elif os.path.isfile(self.local_modules_dir): elif os.path.isfile(self.local_modules_dir):
#TODO(wuzewu): give wanring raise ValueError("Module home should be a folder, not a file")
pass
def check_module_valid(self, module_path): def check_module_valid(self, module_path):
#TODO(wuzewu): code
info = {}
try: try:
desc_pb_path = os.path.join(module_path, 'module_desc.pb') desc_pb_path = os.path.join(module_path, 'module_desc.pb')
if os.path.exists(desc_pb_path) and os.path.isfile(desc_pb_path): if os.path.exists(desc_pb_path) and os.path.isfile(desc_pb_path):
info = {}
desc = module_desc_pb2.ModuleDesc() desc = module_desc_pb2.ModuleDesc()
with open(desc_pb_path, "rb") as fp: with open(desc_pb_path, "rb") as fp:
desc.ParseFromString(fp.read()) desc.ParseFromString(fp.read())
info['version'] = desc.attr.map.data["module_info"].map.data[ info['version'] = desc.attr.map.data["module_info"].map.data[
"version"].s "version"].s
return True, info
else:
logger.warning(
"%s does not exist, the module will be reinstalled" %
desc_pb_path)
except: except:
pass
return False, None return False, None
return True, info
def all_modules(self, update=False): def all_modules(self, update=False):
if not update and self.modules_dict: if not update and self.modules_dict:
...@@ -60,7 +64,6 @@ class LocalModuleManager(object): ...@@ -60,7 +64,6 @@ class LocalModuleManager(object):
for sub_dir_name in os.listdir(self.local_modules_dir): for sub_dir_name in os.listdir(self.local_modules_dir):
sub_dir_path = os.path.join(self.local_modules_dir, sub_dir_name) sub_dir_path = os.path.join(self.local_modules_dir, sub_dir_name)
if os.path.isdir(sub_dir_path): if os.path.isdir(sub_dir_path):
#TODO(wuzewu): get module name
valid, info = self.check_module_valid(sub_dir_path) valid, info = self.check_module_valid(sub_dir_path)
if valid: if valid:
module_name = sub_dir_name module_name = sub_dir_name
...@@ -92,7 +95,6 @@ class LocalModuleManager(object): ...@@ -92,7 +95,6 @@ class LocalModuleManager(object):
url = search_result.get('url', None) url = search_result.get('url', None)
md5_value = search_result.get('md5', None) md5_value = search_result.get('md5', None)
installed_module_version = search_result.get('version', None) installed_module_version = search_result.get('version', None)
#TODO(wuzewu): add compatibility check
if not url or (module_version is not None and installed_module_version if not url or (module_version is not None and installed_module_version
!= module_version) or (name != module_name): != module_version) or (name != module_name):
tips = "Can't find module %s" % module_name tips = "Can't find module %s" % module_name
......
...@@ -117,7 +117,6 @@ class Module(object): ...@@ -117,7 +117,6 @@ class Module(object):
self.cache_fetch_dict = None self.cache_fetch_dict = None
self.cache_program = None self.cache_program = None
# TODO(wuzewu): print more module loading info log
if name: if name:
self._init_with_name(name=name, version=version) self._init_with_name(name=name, version=version)
elif module_dir: elif module_dir:
...@@ -458,7 +457,6 @@ class Module(object): ...@@ -458,7 +457,6 @@ class Module(object):
fetch_dict = self.cache_fetch_dict fetch_dict = self.cache_fetch_dict
program = self.cache_program program = self.cache_program
#TODO(wuzewu): more option
fetch_list = list(set([value for key, value in fetch_dict.items()])) fetch_list = list(set([value for key, value in fetch_dict.items()]))
with fluid.program_guard(program): with fluid.program_guard(program):
result = [] result = []
...@@ -554,7 +552,6 @@ class Module(object): ...@@ -554,7 +552,6 @@ class Module(object):
self._recover_variable_info(program) self._recover_variable_info(program)
paddle_helper.set_op_attr(program, is_test=for_test) paddle_helper.set_op_attr(program, is_test=for_test)
#TODO(wuzewu): return feed_list and fetch_list directly
feed_dict = {} feed_dict = {}
fetch_dict = {} fetch_dict = {}
for index, var in enumerate(signature.inputs): for index, var in enumerate(signature.inputs):
...@@ -569,7 +566,6 @@ class Module(object): ...@@ -569,7 +566,6 @@ class Module(object):
if key: if key:
fetch_dict[key] = program.global_block().var(var.name) fetch_dict[key] = program.global_block().var(var.name)
# TODO(ZeyuChen) encapsulate into a funtion
# update BERT/ERNIE's input tensor's sequence length to max_seq_len # update BERT/ERNIE's input tensor's sequence length to max_seq_len
if self.name.startswith("bert") or self.name.startswith("ernie"): if self.name.startswith("bert") or self.name.startswith("ernie"):
MAX_SEQ_LENGTH = 512 MAX_SEQ_LENGTH = 512
......
...@@ -17,4 +17,6 @@ from .nlp_reader import ClassifyReader ...@@ -17,4 +17,6 @@ from .nlp_reader import ClassifyReader
from .nlp_reader import SequenceLabelReader from .nlp_reader import SequenceLabelReader
from .nlp_reader import LACClassifyReader from .nlp_reader import LACClassifyReader
from .nlp_reader import MultiLabelClassifyReader from .nlp_reader import MultiLabelClassifyReader
from .nlp_reader import ReadingComprehensionReader
from .nlp_reader import RegressionReader
from .cv_reader import ImageClassificationReader from .cv_reader import ImageClassificationReader
此差异已折叠。
...@@ -21,7 +21,6 @@ from __future__ import print_function ...@@ -21,7 +21,6 @@ from __future__ import print_function
import collections import collections
import io import io
import unicodedata import unicodedata
import six import six
......
visualdl >= 1.3.0
pre-commit pre-commit
protobuf >= 3.1.0 protobuf >= 3.1.0
yapf == 0.26.0 yapf == 0.26.0
...@@ -12,3 +11,5 @@ requests ...@@ -12,3 +11,5 @@ requests
pandas pandas
#[py2]pandas == 0.24.0 #[py2]pandas == 0.24.0
flake8 flake8
tb-paddle
cma == 2.7.0
...@@ -32,7 +32,7 @@ max_version, mid_version, min_version = python_version() ...@@ -32,7 +32,7 @@ max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0', 'pyyaml', 'Pillow', 'requests', 'six >= 1.10.0', 'protobuf >= 3.1.0', 'pyyaml', 'Pillow', 'requests',
"visualdl >= 1.3.0" 'visualdl >= 1.3.0', 'cma == 2.7.0'
] ]
if max_version < 3: if max_version < 3:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册