From 5191bf60365d0215c96583b73ced05f6de50514d Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Tue, 5 Mar 2019 13:44:46 +0000
Subject: [PATCH] Change default of some args & activate travis-ci

---
 .travis.yml                | 30 ++++++++++++++++++++++++++++++
 .travis/precommit.sh       | 21 +++++++++++++++++++++
 BERT/convert_params.py     |  2 +-
 BERT/predict_classifier.py |  3 +--
 BERT/run_classifier.py     |  6 +++---
 BERT/run_squad.py          | 19 +++++++++----------
 BERT/train.py              |  2 +-
 7 files changed, 66 insertions(+), 17 deletions(-)
 create mode 100644 .travis.yml
 create mode 100755 .travis/precommit.sh

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..94e00a1
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,30 @@
+language: cpp
+cache: ccache
+sudo: required
+dist: trusty
+services:
+  - docker
+os:
+  - linux
+env:
+  - JOB=PRE_COMMIT
+
+addons:
+  apt:
+    packages:
+      - git
+      - python
+      - python-pip
+      - python2.7-dev
+  ssh_known_hosts: 13.229.163.131
+before_install:
+  - sudo pip install -U virtualenv pre-commit pip
+
+script:
+  - exit_code=0
+  - .travis/precommit.sh || exit_code=$(( exit_code | $? ))
+
+notifications:
+  email:
+    on_success: change
+    on_failure: always
diff --git a/.travis/precommit.sh b/.travis/precommit.sh
new file mode 100755
index 0000000..369fa51
--- /dev/null
+++ b/.travis/precommit.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+function abort(){
+    echo "Your commit does not fit PaddlePaddle code style" 1>&2
+    echo "Please use pre-commit scripts to auto-format your code" 1>&2
+    exit 1
+}
+
+trap 'abort' 0
+set -e
+cd `dirname $0`
+cd ..
+export PATH=/usr/bin:$PATH
+pre-commit install
+
+if ! pre-commit run -a ; then
+  ls -lh
+  git diff  --exit-code
+  exit 1
+fi
+
+trap : 0
diff --git a/BERT/convert_params.py b/BERT/convert_params.py
index 760c7ac..17cada9 100644
--- a/BERT/convert_params.py
+++ b/BERT/convert_params.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 import numpy as np
 import argparse
 import collections
-from args import print_arguments
+from utils.args import print_arguments
 import tensorflow as tf
 import paddle.fluid as fluid
 from tensorflow.python import pywrap_tensorflow
diff --git a/BERT/predict_classifier.py b/BERT/predict_classifier.py
index fe813ff..51e840d 100644
--- a/BERT/predict_classifier.py
+++ b/BERT/predict_classifier.py
@@ -41,7 +41,7 @@ model_g.add_arg("use_fp16",                     bool, False, "Whether to resume
 data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.")
 data_g.add_arg("data_dir",      str,  None,  "Directory to test data.")
 data_g.add_arg("vocab_path",    str,  None,  "Vocabulary path.")
-data_g.add_arg("max_seq_len",   int,  512,   "Number of words of the longest seqence.")
+data_g.add_arg("max_seq_len",   int,  128,   "Number of words of the longest seqence.")
 data_g.add_arg("batch_size",    int,  32,    "Total examples' number in batch for training. see also --in_tokens.")
 data_g.add_arg("in_tokens",     bool, False,
               "If set, the batch size will be the maximum number of tokens in one batch. "
@@ -51,7 +51,6 @@ data_g.add_arg("do_lower_case", bool, True,
 
 run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
 run_type_g.add_arg("use_cuda",          bool,   True,  "If set, use GPU for training.")
-run_type_g.add_arg("use_fast_executor", bool,   False, "If set, use fast parallel executor (in experiment).")
 run_type_g.add_arg("task_name",         str,    None,
                    "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.")
 run_type_g.add_arg("do_prediction",     bool,   True,  "Whether to do prediction on test set.")
diff --git a/BERT/run_classifier.py b/BERT/run_classifier.py
index 5ba2ca9..1453ef7 100644
--- a/BERT/run_classifier.py
+++ b/BERT/run_classifier.py
@@ -44,7 +44,7 @@ model_g.add_arg("init_pretraining_params",  str,  None,
 model_g.add_arg("checkpoints",              str,  "checkpoints",  "Path to save checkpoints.")
 
 train_g = ArgumentGroup(parser, "training", "training options.")
-train_g.add_arg("epoch",             int,    100,     "Number of epoches for training.")
+train_g.add_arg("epoch",             int,    3,       "Number of epoches for fine-tuning.")
 train_g.add_arg("learning_rate",     float,  5e-5,    "Learning rate used to train with warmup.")
 train_g.add_arg("lr_scheduler",      str,    "linear_warmup_decay",
                 "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
@@ -65,13 +65,13 @@ data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data process
 data_g.add_arg("data_dir",      str,  None,  "Path to training data.")
 data_g.add_arg("vocab_path",    str,  None,  "Vocabulary path.")
 data_g.add_arg("max_seq_len",   int,  512,   "Number of words of the longest seqence.")
-data_g.add_arg("batch_size",    int,  32,  "Total examples' number in batch for training. see also --in_tokens.")
+data_g.add_arg("batch_size",    int,  32,    "Total examples' number in batch for training. see also --in_tokens.")
 data_g.add_arg("in_tokens",     bool, False,
               "If set, the batch size will be the maximum number of tokens in one batch. "
               "Otherwise, it will be the maximum number of examples in one batch.")
 data_g.add_arg("do_lower_case", bool, True,
                "Whether to lower case the input text. Should be True for uncased models and False for cased models.")
-data_g.add_arg("random_seed",   int,  0,  "Random seed.")
+data_g.add_arg("random_seed",   int,  0,     "Random seed.")
 
 run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
 run_type_g.add_arg("use_cuda",                     bool,   True,  "If set, use GPU for training.")
diff --git a/BERT/run_squad.py b/BERT/run_squad.py
index 313f06e..07ce1c1 100644
--- a/BERT/run_squad.py
+++ b/BERT/run_squad.py
@@ -43,16 +43,15 @@ model_g.add_arg("init_pretraining_params",  str,  None,
 model_g.add_arg("checkpoints",              str,  "checkpoints",  "Path to save checkpoints.")
 
 train_g = ArgumentGroup(parser, "training", "training options.")
-train_g.add_arg("epoch",             int,    100,     "Number of epoches for training.")
-train_g.add_arg("learning_rate",     float,  5e-5,    "Learning rate used to train with warmup.")
+train_g.add_arg("epoch",             int,    3,      "Number of epoches for fine-tuning.")
+train_g.add_arg("learning_rate",     float,  5e-5,   "Learning rate used to train with warmup.")
 train_g.add_arg("lr_scheduler",      str,    "linear_warmup_decay",
                 "scheduler of learning rate.", choices=['linear_warmup_decay', 'noam_decay'])
-train_g.add_arg("weight_decay",      float,  0.01,    "Weight decay rate for L2 regularizer.")
+train_g.add_arg("weight_decay",      float,  0.01,   "Weight decay rate for L2 regularizer.")
 train_g.add_arg("warmup_proportion", float,  0.1,
                 "Proportion of training steps to perform linear learning rate warmup for.")
-train_g.add_arg("save_steps",        int,    10000,   "The steps interval to save checkpoints.")
-train_g.add_arg("validation_steps",  int,    1000,    "The steps interval to evaluate model performance.")
-train_g.add_arg("use_fp16",          bool,   False,   "Whether to use fp16 mixed precision training.")
+train_g.add_arg("save_steps",        int,    1000,   "The steps interval to save checkpoints.")
+train_g.add_arg("use_fp16",          bool,   False,  "Whether to use fp16 mixed precision training.")
 train_g.add_arg("loss_scaling",      float,  1.0,
                 "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.")
 
@@ -67,9 +66,9 @@ data_g.add_arg("vocab_path",                str,   None,  "Vocabulary path.")
 data_g.add_arg("version_2_with_negative",   bool,  False,
                "If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
 data_g.add_arg("max_seq_len",               int,   512,   "Number of words of the longest seqence.")
-data_g.add_arg("max_query_length",          int,   64,   "Max query length.")
-data_g.add_arg("max_answer_length",         int,   64,   "Max answer length.")
-data_g.add_arg("batch_size",                int,   12,  "Total samples' number in batch for training. see also --in_tokens.")
+data_g.add_arg("max_query_length",          int,   64,    "Max query length.")
+data_g.add_arg("max_answer_length",         int,   30,    "Max answer length.")
+data_g.add_arg("batch_size",                int,   12,    "Total examples' number in batch for training. see also --in_tokens.")
 data_g.add_arg("in_tokens",                 bool,  False,
                "If set, the batch size will be the maximum number of tokens in one batch. "
                "Otherwise, it will be the maximum number of examples in one batch.")
@@ -81,7 +80,7 @@ data_g.add_arg("n_best_size",               int,   20,
                "The total number of n-best predictions to generate in the nbest_predictions.json output file.")
 data_g.add_arg("null_score_diff_threshold", float, 0.0,
                "If null_score - best_non_null is greater than the threshold predict null.")
-data_g.add_arg("random_seed",               int,   0,  "Random seed.")
+data_g.add_arg("random_seed",               int,   0,      "Random seed.")
 
 run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
 run_type_g.add_arg("use_cuda",              bool,   True,  "If set, use GPU for training.")
diff --git a/BERT/train.py b/BERT/train.py
index 55d362b..51df870 100644
--- a/BERT/train.py
+++ b/BERT/train.py
@@ -65,7 +65,7 @@ data_g.add_arg("validation_set_dir",  str,  "./data/validation/",  "Path to trai
 data_g.add_arg("test_set_dir",        str,  None,                  "Path to training data.")
 data_g.add_arg("vocab_path",          str,  "./config/vocab.txt",  "Vocabulary path.")
 data_g.add_arg("max_seq_len",         int,  512,                   "Number of words of the longest seqence.")
-data_g.add_arg("batch_size",          int,  8192,                  "Total examples' number in batch for training. see also --in_tokens.")
+data_g.add_arg("batch_size",          int,  16,                    "Total examples' number in batch for training. see also --in_tokens.")
 data_g.add_arg("in_tokens",           bool, False,
               "If set, the batch size will be the maximum number of tokens in one batch. "
               "Otherwise, it will be the maximum number of examples in one batch.")
-- 
GitLab