提交 c2c1427c 编写于 作者: K kinghuin 提交者: wuzewu

discard use_taskid

上级 1430c9e6
......@@ -30,36 +30,21 @@ parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--use_taskid", type=ast.literal_eval, default=False, help="Whether to user ernie v2 , if not to use bert.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub BERT pretrained model
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
module = hub.Module(name="ernie_v2_eng_base")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name
]
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name, inputs["position_ids"].name,
inputs["segment_ids"].name, inputs["input_mask"].name
]
# Download dataset and use MultiLabelReader to read dataset
dataset = hub.dataset.Toxic()
......
......@@ -36,38 +36,23 @@ parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_taskid", type=ast.literal_eval, default=False, help="Whether to user ernie v2 , if not to use bert.")
args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
# Load Paddlehub BERT pretrained model
if args.use_taskid:
module = hub.Module(name="ernie_eng_base.hub_module")
module = hub.Module(name="ernie_eng_base.hub_module")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Setup feed list for data feeder
feed_list = [
inputs["input_ids"].name,
inputs["position_ids"].name,
inputs["segment_ids"].name,
inputs["input_mask"].name,
]
# Download dataset and use MultiLabelReader to read dataset
dataset = hub.dataset.Toxic()
......
......@@ -16,5 +16,4 @@ python -u multi_label_classifier.py \
--learning_rate=5e-5 \
--weight_decay=0.01 \
--max_seq_len=128 \
--num_epoch=3 \
--use_taskid=False
--num_epoch=3
......@@ -2,4 +2,4 @@ export FLAGS_eager_delete_tensor_gb=0.0
export CUDA_VISIBLE_DEVICES=0
CKPT_DIR="./ckpt_toxic"
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu True --use_taskid False
python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu True
......@@ -34,7 +34,6 @@ parser.add_argument("--max_seq_len", type=int, default=512, help="Number of word
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
parser.add_argument("--use_taskid", type=ast.literal_eval, default=False, help="Whether to use taskid ,if yes to use ernie v2.")
args = parser.parse_args()
# yapf: enable.
......@@ -43,10 +42,7 @@ if __name__ == '__main__':
# Download dataset and use ClassifyReader to read dataset
if args.dataset.lower() == "sts-b":
dataset = hub.dataset.GLUE("STS-B")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
else:
raise ValueError("%s dataset is not defined" % args.dataset)
......
......@@ -16,5 +16,4 @@ python -u regression.py \
--max_seq_len=128 \
--num_epoch=3 \
--use_pyreader=True \
--use_data_parallel=True \
--use_taskid=False \
--use_data_parallel=True
......@@ -40,10 +40,6 @@ if __name__ == '__main__':
module = hub.Module(name="ernie_tiny")
inputs, outputs, program = module.context(
trainable=True, max_seq_len=args.max_seq_len)
if module.name.startswith("ernie_v2"):
use_taskid = True
else:
use_taskid = False
# Download dataset and use SequenceLabelReader to read dataset
dataset = hub.dataset.MSRA_NER()
......
......@@ -36,7 +36,6 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whe
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
parser.add_argument("--use_taskid", type=ast.literal_eval, default=False, help="Whether to use taskid ,if yes to use ernie v2.")
args = parser.parse_args()
# yapf: enable.
......@@ -58,60 +57,36 @@ if __name__ == '__main__':
metrics_choices = ["acc"]
elif args.dataset.lower() == "mrpc":
dataset = hub.dataset.GLUE("MRPC")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["f1", "acc"]
# The first metric will be choose to eval. Ref: task.py:799
elif args.dataset.lower() == "qqp":
dataset = hub.dataset.GLUE("QQP")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["f1", "acc"]
elif args.dataset.lower() == "sst-2":
dataset = hub.dataset.GLUE("SST-2")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["acc"]
elif args.dataset.lower() == "cola":
dataset = hub.dataset.GLUE("CoLA")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["matthews", "acc"]
elif args.dataset.lower() == "qnli":
dataset = hub.dataset.GLUE("QNLI")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["acc"]
elif args.dataset.lower() == "rte":
dataset = hub.dataset.GLUE("RTE")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli_m":
dataset = hub.dataset.GLUE("MNLI_m")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["acc"]
elif args.dataset.lower() == "mnli_mm":
dataset = hub.dataset.GLUE("MNLI_mm")
if args.use_taskid:
module = hub.Module(name="ernie_v2_eng_base")
else:
module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
module = hub.Module(name="ernie_v2_eng_base")
metrics_choices = ["acc"]
elif args.dataset.lower().startswith("xnli"):
dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
......
......@@ -18,4 +18,3 @@ python -u predict.py --checkpoint_dir=$CKPT_DIR \
--use_gpu=True \
--dataset=${DATASET} \
--batch_size=150 \
--use_taskid=False \
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册