提交 5191bf60 编写于 作者: Y Yibing Liu

Change default of some args & activate travis-ci

上级 880a14e9
language: cpp
cache: ccache
sudo: required
dist: trusty
services:
- docker
os:
- linux
env:
- JOB=PRE_COMMIT
addons:
apt:
packages:
- git
- python
- python-pip
- python2.7-dev
ssh_known_hosts: 13.229.163.131
before_install:
- sudo pip install -U virtualenv pre-commit pip
script:
- exit_code=0
- .travis/precommit.sh || exit_code=$(( exit_code | $? ))
notifications:
email:
on_success: change
on_failure: always
#!/bin/bash
function abort(){
echo "Your commit does not fit PaddlePaddle code style" 1>&2
echo "Please use pre-commit scripts to auto-format your code" 1>&2
exit 1
}
trap 'abort' 0
set -e
cd `dirname $0`
cd ..
export PATH=/usr/bin:$PATH
pre-commit install
if ! pre-commit run -a ; then
ls -lh
git diff --exit-code
exit 1
fi
trap : 0
...@@ -20,7 +20,7 @@ from __future__ import print_function ...@@ -20,7 +20,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import argparse import argparse
import collections import collections
from args import print_arguments from utils.args import print_arguments
import tensorflow as tf import tensorflow as tf
import paddle.fluid as fluid import paddle.fluid as fluid
from tensorflow.python import pywrap_tensorflow from tensorflow.python import pywrap_tensorflow
......
...@@ -41,7 +41,7 @@ model_g.add_arg("use_fp16", bool, False, "Whether to resume ...@@ -41,7 +41,7 @@ model_g.add_arg("use_fp16", bool, False, "Whether to resume
data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.") data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.")
data_g.add_arg("data_dir", str, None, "Directory to test data.") data_g.add_arg("data_dir", str, None, "Directory to test data.")
data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 128, "Number of words of the longest seqence.")
data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
...@@ -51,7 +51,6 @@ data_g.add_arg("do_lower_case", bool, True, ...@@ -51,7 +51,6 @@ data_g.add_arg("do_lower_case", bool, True,
run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.")
run_type_g.add_arg("use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).")
run_type_g.add_arg("task_name", str, None, run_type_g.add_arg("task_name", str, None,
"The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.") "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.")
run_type_g.add_arg("do_prediction", bool, True, "Whether to do prediction on test set.") run_type_g.add_arg("do_prediction", bool, True, "Whether to do prediction on test set.")
......
...@@ -44,7 +44,7 @@ model_g.add_arg("init_pretraining_params", str, None, ...@@ -44,7 +44,7 @@ model_g.add_arg("init_pretraining_params", str, None,
model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.")
train_g = ArgumentGroup(parser, "training", "training options.") train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.")
train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", train_g.add_arg("lr_scheduler", str, "linear_warmup_decay",
"scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
......
...@@ -43,15 +43,14 @@ model_g.add_arg("init_pretraining_params", str, None, ...@@ -43,15 +43,14 @@ model_g.add_arg("init_pretraining_params", str, None,
model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.") model_g.add_arg("checkpoints", str, "checkpoints", "Path to save checkpoints.")
train_g = ArgumentGroup(parser, "training", "training options.") train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 100, "Number of epoches for training.") train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.") train_g.add_arg("learning_rate", float, 5e-5, "Learning rate used to train with warmup.")
train_g.add_arg("lr_scheduler", str, "linear_warmup_decay", train_g.add_arg("lr_scheduler", str, "linear_warmup_decay",
"scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay']) "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.") train_g.add_arg("weight_decay", float, 0.01, "Weight decay rate for L2 regularizer.")
train_g.add_arg("warmup_proportion", float, 0.1, train_g.add_arg("warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for.") "Proportion of training steps to perform linear learning rate warmup for.")
train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.") train_g.add_arg("save_steps", int, 1000, "The steps interval to save checkpoints.")
train_g.add_arg("validation_steps", int, 1000, "The steps interval to evaluate model performance.")
train_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.") train_g.add_arg("use_fp16", bool, False, "Whether to use fp16 mixed precision training.")
train_g.add_arg("loss_scaling", float, 1.0, train_g.add_arg("loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.") "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.")
...@@ -68,8 +67,8 @@ data_g.add_arg("version_2_with_negative", bool, False, ...@@ -68,8 +67,8 @@ data_g.add_arg("version_2_with_negative", bool, False,
"If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.") "If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.")
data_g.add_arg("max_query_length", int, 64, "Max query length.") data_g.add_arg("max_query_length", int, 64, "Max query length.")
data_g.add_arg("max_answer_length", int, 64, "Max answer length.") data_g.add_arg("max_answer_length", int, 30, "Max answer length.")
data_g.add_arg("batch_size", int, 12, "Total samples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 12, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
"Otherwise, it will be the maximum number of examples in one batch.") "Otherwise, it will be the maximum number of examples in one batch.")
......
...@@ -65,7 +65,7 @@ data_g.add_arg("validation_set_dir", str, "./data/validation/", "Path to trai ...@@ -65,7 +65,7 @@ data_g.add_arg("validation_set_dir", str, "./data/validation/", "Path to trai
data_g.add_arg("test_set_dir", str, None, "Path to training data.") data_g.add_arg("test_set_dir", str, None, "Path to training data.")
data_g.add_arg("vocab_path", str, "./config/vocab.txt", "Vocabulary path.") data_g.add_arg("vocab_path", str, "./config/vocab.txt", "Vocabulary path.")
data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.")
data_g.add_arg("batch_size", int, 8192, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("batch_size", int, 16, "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens", bool, False, data_g.add_arg("in_tokens", bool, False,
"If set, the batch size will be the maximum number of tokens in one batch. " "If set, the batch size will be the maximum number of tokens in one batch. "
"Otherwise, it will be the maximum number of examples in one batch.") "Otherwise, it will be the maximum number of examples in one batch.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册