未验证 提交 8a0753a5 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #3 from PaddlePaddle/args_fix

Change default value of some args & activate travis-ci
language: cpp
cache: ccache
sudo: required
dist: trusty
services:
- docker
os:
- linux
env:
- JOB=PRE_COMMIT
addons:
apt:
packages:
- git
- python
- python-pip
- python2.7-dev
ssh_known_hosts: 13.229.163.131
before_install:
- sudo pip install -U virtualenv pre-commit pip
script:
- exit_code=0
- .travis/precommit.sh || exit_code=$(( exit_code | $? ))
notifications:
email:
on_success: change
on_failure: always
#!/bin/bash
function abort(){
echo "Your commit does not fit PaddlePaddle code style" 1>&2
echo "Please use pre-commit scripts to auto-format your code" 1>&2
exit 1
}
trap 'abort' 0
set -e
cd `dirname $0`
cd ..
export PATH=/usr/bin:$PATH
pre-commit install
if ! pre-commit run -a ; then
ls -lh
git diff --exit-code
exit 1
fi
trap : 0
...@@ -20,7 +20,7 @@ from __future__ import print_function ...@@ -20,7 +20,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import argparse import argparse
import collections import collections
from args import print_arguments from utils.args import print_arguments
import tensorflow as tf import tensorflow as tf
import paddle.fluid as fluid import paddle.fluid as fluid
from tensorflow.python import pywrap_tensorflow from tensorflow.python import pywrap_tensorflow
......
...@@ -41,7 +41,7 @@ model_g.add_arg("use_fp16", bool, False, "Whether to resume ...@@ -41,7 +41,7 @@ model_g.add_arg("use_fp16", bool, False, "Whether to resume
data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.") data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.")
data_g.add_arg("data_dir", str, None, "Directory to test data.") data_g.add_arg("data_dir", str, None, "Directory to test data.")
data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 128, "Number of words of the longest seqence.")
data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
...@@ -51,7 +51,6 @@ data_g.add_arg("do_lower_case", bool, True, ...@@ -51,7 +51,6 @@ data_g.add_arg("do_lower_case", bool, True,
run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.")
run_type_g.add_arg("use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).")
run_type_g.add_arg("task_name", str, None, run_type_g.add_arg("task_name", str, None,
"The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.") "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.")
run_type_g.add_arg("do_prediction", bool, True, "Whether to do prediction on test set.") run_type_g.add_arg("do_prediction", bool, True, "Whether to do prediction on test set.")
......
...@@ -44,7 +44,7 @@ model_g.add_arg("init_pretraining_params", str, None, ...@@ -44,7 +44,7 @@ model_g.add_arg("init_pretraining_params", str, None,
model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.")
train_g = ArgumentGroup(parser, "training", "training options.") train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.")
train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", train_g.add_arg("lr_scheduler", str, "linear_warmup_decay",
"scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
...@@ -65,13 +65,13 @@ data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data process ...@@ -65,13 +65,13 @@ data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data process
data_g.add_arg("data_dir", str, None, "Path to training data.") data_g.add_arg("data_dir", str, None, "Path to training data.")
data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.")
data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
"Otherwise, it will be the maximum number of examples in one batch.") "Otherwise, it will be the maximum number of examples in one batch.")
data_g.add_arg("do_lower_case", bool, True, data_g.add_arg("do_lower_case", bool, True,
"Whether to lower case the input text. Should be True for uncased models and False for cased models.") "Whether to lower case the input text. Should be True for uncased models and False for cased models.")
data_g.add_arg("random_seed", int, 0, "Random seed.") data_g.add_arg("random_seed", int, 0, "Random seed.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.")
......
...@@ -43,16 +43,15 @@ model_g.add_arg("init_pretraining_params", str, None, ...@@ -43,16 +43,15 @@ model_g.add_arg("init_pretraining_params", str, None,
model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.")
train_g = ArgumentGroup(parser, "training", "training options.") train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.")
train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", train_g.add_arg("lr_scheduler", str, "linear_warmup_decay",
"scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.")
train_g.add_arg("warmup_proportion", float, 0.1, train_g.add_arg("warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for.") "Proportion of training steps to perform linear learning rate warmup for.")
train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.") train_g.add_arg("save_steps", int, 1000, "The steps interval to save checkpoints.")
train_g.add_arg("validation_steps", int, 1000, "The steps interval to evaluate model performance.") train_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.")
train_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.")
train_g.add_arg("loss_scaling", float, 1.0, train_g.add_arg("loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.") "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.")
...@@ -67,9 +66,9 @@ data_g.add_arg("vocab_path", str, None, "Vocabulary path.") ...@@ -67,9 +66,9 @@ data_g.add_arg("vocab_path", str, None, "Vocabulary path.")
data_g.add_arg("version_2_with_negative", bool, False, data_g.add_arg("version_2_with_negative", bool, False,
"If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.") "If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.")
data_g.add_arg("max_query_length", int, 64, "Max query length.") data_g.add_arg("max_query_length", int, 64, "Max query length.")
data_g.add_arg("max_answer_length", int, 64, "Max answer length.") data_g.add_arg("max_answer_length", int, 30, "Max answer length.")
data_g.add_arg("batch_size", int, 12, "Total samples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 12, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
"Otherwise, it will be the maximum number of examples in one batch.") "Otherwise, it will be the maximum number of examples in one batch.")
...@@ -81,7 +80,7 @@ data_g.add_arg("n_best_size", int, 20, ...@@ -81,7 +80,7 @@ data_g.add_arg("n_best_size", int, 20,
"The total number of n-best predictions to generate in the nbest_predictions.json output file.") "The total number of n-best predictions to generate in the nbest_predictions.json output file.")
data_g.add_arg("null_score_diff_threshold", float, 0.0, data_g.add_arg("null_score_diff_threshold", float, 0.0,
"If null_score - best_non_null is greater than the threshold predict null.") "If null_score - best_non_null is greater than the threshold predict null.")
data_g.add_arg("random_seed", int, 0, "Random seed.") data_g.add_arg("random_seed", int, 0, "Random seed.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.")
......
...@@ -65,7 +65,7 @@ data_g.add_arg("validation_set_dir", str, "./data/validation/", "Path to trai ...@@ -65,7 +65,7 @@ data_g.add_arg("validation_set_dir", str, "./data/validation/", "Path to trai
data_g.add_arg("test_set_dir", str, None, "Path to training data.") data_g.add_arg("test_set_dir", str, None, "Path to training data.")
data_g.add_arg("vocab_path", str, "./config/vocab.txt", "Vocabulary path.") data_g.add_arg("vocab_path", str, "./config/vocab.txt", "Vocabulary path.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.")
data_g.add_arg("batch_size", int, 8192, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 16, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
"Otherwise, it will be the maximum number of examples in one batch.") "Otherwise, it will be the maximum number of examples in one batch.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册