From f68214c9a86548ced31f92031f1faaa8e993523e Mon Sep 17 00:00:00 2001
From: kinghuin <kinghuin_chull@163.com>
Date: Tue, 26 Nov 2019 19:11:15 +0800
Subject: [PATCH] restore the removed code (#235)

* restore the removed code

* modify cv reader
---
 demo/image-classification/run_classifier.sh   |  2 ++
 demo/image-classification/run_predict.sh      |  2 ++
 demo/qa_classification/run_classifier.sh      |  1 -
 .../reading_comprehension.py                  |  2 --
 demo/reading-comprehension/run_finetune.sh    |  1 +
 demo/reading-comprehension/run_predict.sh     |  1 +
 demo/regression/run_predict.sh                |  2 +-
 demo/sequence-labeling/predict.py             |  6 ++--
 demo/sequence-labeling/run_sequence_label.sh  |  1 +
 demo/sequence-labeling/sequence_label.py      |  3 --
 demo/text-classification/predict.py           | 28 ++++++++++++++++---
 demo/text-classification/run_classifier.sh    |  1 +
 demo/text-classification/run_predict.sh       |  2 +-
 demo/text-classification/text_classifier.py   | 12 ++++----
 paddlehub/reader/cv_reader.py                 |  4 +++
 15 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/demo/image-classification/run_classifier.sh b/demo/image-classification/run_classifier.sh
index d91e67ab..bcb6dc32 100644
--- a/demo/image-classification/run_classifier.sh
+++ b/demo/image-classification/run_classifier.sh
@@ -1,2 +1,4 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
+
 python -u img_classifier.py $@
diff --git a/demo/image-classification/run_predict.sh b/demo/image-classification/run_predict.sh
index 149c89cf..1c6890aa 100644
--- a/demo/image-classification/run_predict.sh
+++ b/demo/image-classification/run_predict.sh
@@ -1,2 +1,4 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
+
 python -u predict.py $@
diff --git a/demo/qa_classification/run_classifier.sh b/demo/qa_classification/run_classifier.sh
index e41b4a50..6f6abc7b 100644
--- a/demo/qa_classification/run_classifier.sh
+++ b/demo/qa_classification/run_classifier.sh
@@ -1,7 +1,6 @@
 export FLAGS_eager_delete_tensor_gb=0.0
 export CUDA_VISIBLE_DEVICES=0
 
-
 CKPT_DIR="./ckpt_qa"
 # Recommending hyper parameters for difference task
 # ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
diff --git a/demo/reading-comprehension/reading_comprehension.py b/demo/reading-comprehension/reading_comprehension.py
index 93298b7f..85bdada1 100644
--- a/demo/reading-comprehension/reading_comprehension.py
+++ b/demo/reading-comprehension/reading_comprehension.py
@@ -89,9 +89,7 @@ if __name__ == '__main__':
 
     # Setup runing config for PaddleHub Finetune API
     config = hub.RunConfig(
-        log_interval=10,
         eval_interval=300,
-        save_ckpt_interval=10000,
         use_pyreader=args.use_pyreader,
         use_data_parallel=args.use_data_parallel,
         use_cuda=args.use_gpu,
diff --git a/demo/reading-comprehension/run_finetune.sh b/demo/reading-comprehension/run_finetune.sh
index e0496f7d..9d92042c 100644
--- a/demo/reading-comprehension/run_finetune.sh
+++ b/demo/reading-comprehension/run_finetune.sh
@@ -1,4 +1,5 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
 
 # Recommending hyper parameters for difference task
 # squad: batch_size=8, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
diff --git a/demo/reading-comprehension/run_predict.sh b/demo/reading-comprehension/run_predict.sh
index 6be03eba..456f7dc2 100644
--- a/demo/reading-comprehension/run_predict.sh
+++ b/demo/reading-comprehension/run_predict.sh
@@ -1,4 +1,5 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
 
 CKPT_DIR="./ckpt_cmrc2018"
 dataset=cmrc2018
diff --git a/demo/regression/run_predict.sh b/demo/regression/run_predict.sh
index 3d0c1ae0..34ce6fe3 100644
--- a/demo/regression/run_predict.sh
+++ b/demo/regression/run_predict.sh
@@ -1,5 +1,5 @@
 export FLAGS_eager_delete_tensor_gb=0.0
-# export CUDA_VISIBLE_DEVICES=0
+export CUDA_VISIBLE_DEVICES=0
 
 # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
 DATASET="STS-B"
diff --git a/demo/sequence-labeling/predict.py b/demo/sequence-labeling/predict.py
index 96fea4fa..81419196 100644
--- a/demo/sequence-labeling/predict.py
+++ b/demo/sequence-labeling/predict.py
@@ -41,7 +41,7 @@ args = parser.parse_args()
 
 if __name__ == '__main__':
     # loading Paddlehub ERNIE pretrained model
-    module = hub.Module(name="ernie")
+    module = hub.Module(name="ernie_tiny")
     inputs, outputs, program = module.context(max_seq_len=args.max_seq_len)
 
     # Sentence labeling dataset reader
@@ -49,7 +49,9 @@ if __name__ == '__main__':
     reader = hub.reader.SequenceLabelReader(
         dataset=dataset,
         vocab_path=module.get_vocab_path(),
-        max_seq_len=args.max_seq_len)
+        max_seq_len=args.max_seq_len,
+        sp_model_path=module.get_spm_path(),
+        word_dict_path=module.get_word_dict_path())
     inv_label_map = {val: key for key, val in reader.label_map.items()}
 
     place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
diff --git a/demo/sequence-labeling/run_sequence_label.sh b/demo/sequence-labeling/run_sequence_label.sh
index 4b4b5c3a..ab5eef90 100644
--- a/demo/sequence-labeling/run_sequence_label.sh
+++ b/demo/sequence-labeling/run_sequence_label.sh
@@ -1,4 +1,5 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
 
 CKPT_DIR="./ckpt_sequence_label"
 python -u sequence_label.py \
diff --git a/demo/sequence-labeling/sequence_label.py b/demo/sequence-labeling/sequence_label.py
index 00b2fe8a..52cfb665 100644
--- a/demo/sequence-labeling/sequence_label.py
+++ b/demo/sequence-labeling/sequence_label.py
@@ -71,9 +71,6 @@ if __name__ == '__main__':
 
     # Setup runing config for PaddleHub Finetune API
     config = hub.RunConfig(
-        log_interval=10,
-        eval_interval=300,
-        save_ckpt_interval=10000,
         use_data_parallel=args.use_data_parallel,
         use_pyreader=args.use_pyreader,
         use_cuda=args.use_gpu,
diff --git a/demo/text-classification/predict.py b/demo/text-classification/predict.py
index 5829fd64..b044a815 100644
--- a/demo/text-classification/predict.py
+++ b/demo/text-classification/predict.py
@@ -45,15 +45,35 @@ if __name__ == '__main__':
     # Download dataset and use ClassifyReader to read dataset
     if args.dataset.lower() == "chnsenticorp":
         dataset = hub.dataset.ChnSentiCorp()
-        module = hub.Module(name="ernie")
+        module = hub.Module(name="ernie_tiny")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == "tnews":
+        dataset = hub.dataset.TNews()
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
         metrics_choices = ["acc"]
     elif args.dataset.lower() == "nlpcc_dbqa":
         dataset = hub.dataset.NLPCC_DBQA()
-        module = hub.Module(name="ernie")
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
         metrics_choices = ["acc"]
     elif args.dataset.lower() == "lcqmc":
         dataset = hub.dataset.LCQMC()
-        module = hub.Module(name="ernie")
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == 'inews':
+        dataset = hub.dataset.INews()
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == 'bq':
+        dataset = hub.dataset.BQ()
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == 'thucnews':
+        dataset = hub.dataset.THUCNEWS()
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == 'iflytek':
+        dataset = hub.dataset.IFLYTEK()
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
         metrics_choices = ["acc"]
     elif args.dataset.lower() == "mrpc":
         dataset = hub.dataset.GLUE("MRPC")
@@ -90,7 +110,7 @@ if __name__ == '__main__':
         metrics_choices = ["acc"]
     elif args.dataset.lower().startswith("xnli"):
         dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
-        module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
+        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
         metrics_choices = ["acc"]
     else:
         raise ValueError("%s dataset is not defined" % args.dataset)
diff --git a/demo/text-classification/run_classifier.sh b/demo/text-classification/run_classifier.sh
index d297cb74..c7e5d329 100644
--- a/demo/text-classification/run_classifier.sh
+++ b/demo/text-classification/run_classifier.sh
@@ -1,4 +1,5 @@
 export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
 
 # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
 DATASET="chnsenticorp"
diff --git a/demo/text-classification/run_predict.sh b/demo/text-classification/run_predict.sh
index 281b8587..f8badbb0 100644
--- a/demo/text-classification/run_predict.sh
+++ b/demo/text-classification/run_predict.sh
@@ -17,4 +17,4 @@ python -u predict.py --checkpoint_dir=$CKPT_DIR \
                             --max_seq_len=128 \
                             --use_gpu=True \
                             --dataset=${DATASET} \
-                            --batch_size=150 \
+                            --batch_size=32 \
diff --git a/demo/text-classification/text_classifier.py b/demo/text-classification/text_classifier.py
index 178efa9d..155d9e6d 100644
--- a/demo/text-classification/text_classifier.py
+++ b/demo/text-classification/text_classifier.py
@@ -47,7 +47,7 @@ if __name__ == '__main__':
     elif args.dataset.lower() == "tnews":
         dataset = hub.dataset.TNews()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
-        metrics_choices = ["acc", "f1"]
+        metrics_choices = ["acc"]
     elif args.dataset.lower() == "nlpcc_dbqa":
         dataset = hub.dataset.NLPCC_DBQA()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
@@ -59,19 +59,19 @@ if __name__ == '__main__':
     elif args.dataset.lower() == 'inews':
         dataset = hub.dataset.INews()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
-        metrics_choices = ["acc", "f1"]
+        metrics_choices = ["acc"]
     elif args.dataset.lower() == 'bq':
         dataset = hub.dataset.BQ()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
-        metrics_choices = ["acc", "f1"]
+        metrics_choices = ["acc"]
     elif args.dataset.lower() == 'thucnews':
         dataset = hub.dataset.THUCNEWS()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
-        metrics_choices = ["acc", "f1"]
+        metrics_choices = ["acc"]
     elif args.dataset.lower() == 'iflytek':
         dataset = hub.dataset.IFLYTEK()
         module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
-        metrics_choices = ["acc", "f1"]
+        metrics_choices = ["acc"]
     elif args.dataset.lower() == "mrpc":
         dataset = hub.dataset.GLUE("MRPC")
         module = hub.Module(name="ernie_v2_eng_base")
@@ -97,7 +97,7 @@ if __name__ == '__main__':
         dataset = hub.dataset.GLUE("RTE")
         module = hub.Module(name="ernie_v2_eng_base")
         metrics_choices = ["acc"]
-    elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli":
+    elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli_m":
         dataset = hub.dataset.GLUE("MNLI_m")
         module = hub.Module(name="ernie_v2_eng_base")
         metrics_choices = ["acc"]
diff --git a/paddlehub/reader/cv_reader.py b/paddlehub/reader/cv_reader.py
index 04fe4ead..8196215a 100644
--- a/paddlehub/reader/cv_reader.py
+++ b/paddlehub/reader/cv_reader.py
@@ -49,6 +49,7 @@ class ImageClassificationReader(object):
         self.data_augmentation = data_augmentation
         self.images_std = images_std
         self.images_mean = images_mean
+        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
 
         if self.images_mean is None:
             try:
@@ -80,12 +81,15 @@ class ImageClassificationReader(object):
             raise ValueError("The dataset is none and it's not allowed!")
         if phase == "train":
             data = self.dataset.train_data(shuffle)
+            self.num_examples['train'] = len(self.get_train_examples())
         elif phase == "test":
             shuffle = False
             data = self.dataset.test_data(shuffle)
+            self.num_examples['test'] = len(self.get_test_examples())
         elif phase == "val" or phase == "dev":
             shuffle = False
             data = self.dataset.validate_data(shuffle)
+            self.num_examples['dev'] = len(self.get_dev_examples())
         elif phase == "predict":
             data = data
 
-- 
GitLab