ready 1.8 (#745)

2c825d54 · kinghuin · GitHub · 95395059 · 95395059 · 95395059
8 changed file
--- a/demo/object_detection/predict.py
+++ b/demo/object_detection/predict.py
-#coding:utf-8
-import argparse
-import os
-import ast
-
-import paddle.fluid as fluid
-import paddlehub as hub
-import numpy as np
-from paddlehub.reader.cv_reader import ObjectDetectionReader
-from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
-from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out
-from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
-from paddlehub.common import detection_config as dconf
-
-# yapf: disable
-parser = argparse.ArgumentParser(__doc__)
-parser.add_argument("--use_gpu",            type=ast.literal_eval,  default=False,                      help="Whether use GPU for predict.")
-parser.add_argument("--checkpoint_dir",     type=str,               default="paddlehub_finetune_ckpt",  help="Path to save log data.")
-parser.add_argument("--batch_size",         type=int,               default=2,                         help="Total examples' number in batch for training.")
-parser.add_argument("--module",             type=str,               default="ssd",                 help="Module used as a feature extractor.")
-parser.add_argument("--dataset",            type=str,               default="coco10",                  help="Dataset to finetune.")
-# yapf: enable.
-
-module_map = {
-    "yolov3": "yolov3_darknet53_coco2017",
-    "ssd": "ssd_vgg16_512_coco2017",
-    "faster_rcnn": "faster_rcnn_resnet50_coco2017",
-}
-
-
-def predict(args):
-    module_name = args.module  # 'yolov3_darknet53_coco2017'
-    model_type = get_model_type(module_name)  # 'yolo'
-    # define data
-    ds = hub.dataset.Coco10(model_type)
-    print("ds.num_labels", ds.num_labels)
-
-    data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
-
-    # define model(program)
-    module = hub.Module(name=module_name)
-    if model_type == 'rcnn':
-        input_dict, output_dict, program = module.context(
-            trainable=True, phase='train')
-        input_dict_pred, output_dict_pred, program_pred = module.context(
-            trainable=False)
-    else:
-        input_dict, output_dict, program = module.context(trainable=True)
-        input_dict_pred = output_dict_pred = None
-    feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
-                                              input_dict_pred)
-    feature, pred_feature = get_mid_feature(module_name, output_dict,
-                                            output_dict_pred)
-
-    config = hub.RunConfig(
-        use_data_parallel=False,
-        use_pyreader=True,
-        use_cuda=args.use_gpu,
-        batch_size=args.batch_size,
-        enable_memory_optim=False,
-        checkpoint_dir=args.checkpoint_dir,
-        strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
-
-    task = hub.DetectionTask(
-        data_reader=data_reader,
-        num_classes=ds.num_labels,
-        feed_list=feed_list,
-        feature=feature,
-        predict_feed_list=pred_feed_list,
-        predict_feature=pred_feature,
-        model_type=model_type,
-        config=config)
-
-    data = [
-        "./test/test_img_bird.jpg",
-        "./test/test_img_cat.jpg",
-    ]
-    label_map = ds.label_dict()
-    run_states = task.predict(data=data, accelerate_mode=False)
-    results = [run_state.run_results for run_state in run_states]
-    for outs in results:
-        keys = ['im_shape', 'im_id', 'bbox']
-        res = {
-            k: (np.array(v), v.recursive_sequence_lengths())
-            for k, v in zip(keys, outs)
-        }
-        print("im_id", res['im_id'])
-        is_bbox_normalized = dconf.conf[model_type]['is_bbox_normalized']
-        clsid2catid = {}
-        for k in label_map:
-            clsid2catid[k] = k
-        bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
-        print(bbox_results)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    if not args.module in module_map:
-        hub.logger.error("module should in %s" % module_map.keys())
-        exit(1)
-    args.module = module_map[args.module]
-
-    predict(args)
--- a/demo/object_detection/train.py
+++ b/demo/object_detection/train.py
-# -*- coding:utf8 -*-
-import argparse
-import os
-import ast
-
-import paddle.fluid as fluid
-import paddlehub as hub
-from paddlehub.reader.cv_reader import ObjectDetectionReader
-from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
-import numpy as np
-from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature
-
-# yapf: disable
-parser = argparse.ArgumentParser(__doc__)
-parser.add_argument("--num_epoch",          type=int,               default=50,                          help="Number of epoches for fine-tuning.")
-parser.add_argument("--use_gpu",            type=ast.literal_eval,  default=False,                      help="Whether use GPU for fine-tuning.")
-parser.add_argument("--checkpoint_dir",     type=str,               default="paddlehub_finetune_ckpt",  help="Path to save log data.")
-parser.add_argument("--batch_size",         type=int,               default=8,                         help="Total examples' number in batch for training.")
-parser.add_argument("--module",             type=str,               default="ssd",                 help="Module used as feature extractor.")
-parser.add_argument("--dataset",            type=str,               default="coco_10",                  help="Dataset to finetune.")
-parser.add_argument("--use_data_parallel",  type=ast.literal_eval,  default=False,                      help="Whether use data parallel.")
-# yapf: enable.
-
-module_map = {
-    "yolov3": "yolov3_darknet53_coco2017",
-    "ssd": "ssd_vgg16_512_coco2017",
-    "faster_rcnn": "faster_rcnn_resnet50_coco2017",
-}
-
-
-def finetune(args):
-    module_name = args.module  # 'yolov3_darknet53_coco2017'
-    model_type = get_model_type(module_name)  # 'yolo'
-    # define dataset
-    ds = hub.dataset.Coco10(model_type)
-    # base_path = '/home/local3/zhaopenghao/data/detect/paddle-job-84942-0'
-    # train_dir = 'train_data/images'
-    # train_list = 'train_data/coco/instances_coco.json'
-    # val_dir = 'eval_data/images'
-    # val_list = 'eval_data/coco/instances_coco.json'
-    # ds = ObjectDetectionDataset(base_path, train_dir, train_list, val_dir, val_list, val_dir, val_list, model_type=model_type)
-    # print(ds.label_dict())
-    print("ds.num_labels", ds.num_labels)
-
-    # define batch reader
-    data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type)
-
-    # define model(program)
-    module = hub.Module(name=module_name)
-    if model_type == 'rcnn':
-        input_dict, output_dict, program = module.context(
-            trainable=True, phase='train')
-        input_dict_pred, output_dict_pred, program_pred = module.context(
-            trainable=False)
-    else:
-        input_dict, output_dict, program = module.context(trainable=True)
-        input_dict_pred = output_dict_pred = None
-
-    print("input_dict keys", input_dict.keys())
-    print("output_dict keys", output_dict.keys())
-    feed_list, pred_feed_list = get_feed_list(module_name, input_dict,
-                                              input_dict_pred)
-    print("output_dict length:", len(output_dict))
-    print(output_dict.keys())
-    if output_dict_pred is not None:
-        print(output_dict_pred.keys())
-    feature, pred_feature = get_mid_feature(module_name, output_dict,
-                                            output_dict_pred)
-
-    config = hub.RunConfig(
-        log_interval=10,
-        eval_interval=100,
-        use_data_parallel=args.use_data_parallel,
-        use_pyreader=True,
-        use_cuda=args.use_gpu,
-        num_epoch=args.num_epoch,
-        batch_size=args.batch_size,
-        enable_memory_optim=False,
-        checkpoint_dir=args.checkpoint_dir,
-        strategy=hub.finetune.strategy.DefaultFinetuneStrategy(
-            learning_rate=0.00025, optimizer_name="adam"))
-
-    task = hub.DetectionTask(
-        data_reader=data_reader,
-        num_classes=ds.num_labels,
-        feed_list=feed_list,
-        feature=feature,
-        predict_feed_list=pred_feed_list,
-        predict_feature=pred_feature,
-        model_type=model_type,
-        config=config)
-    task.finetune_and_eval()
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    if not args.module in module_map:
-        hub.logger.error("module should in %s" % module_map.keys())
-        exit(1)
-    args.module = module_map[args.module]
-
-    finetune(args)
--- a/paddlehub/dataset/__init__.py
+++ b/paddlehub/dataset/__init__.py
@@ -38,4 +38,3 @@ from .flowers import FlowersDataset as Flowers
 from .stanford_dogs import StanfordDogsDataset as StanfordDogs
 from .food101 import Food101Dataset as Food101
 from .indoor67 import Indoor67Dataset as Indoor67
-from .coco10 import Coco10
--- a/paddlehub/dataset/coco10.py
+++ b/paddlehub/dataset/coco10.py
-#coding:utf-8
-# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-import paddlehub as hub
-from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset
-
-
-class Coco10(ObjectDetectionDataset):
-    def __init__(self, model_type='ssd'):
-        dataset_path = os.path.join(hub.common.dir.DATA_HOME, "coco_10")
-        # self.base_path = self._download_dataset(
-        #     dataset_path=dataset_path,
-        #     url="https://bj.bcebos.com/paddlehub-dataset/dog-cat.tar.gz")
-        base_path = dataset_path
-        train_image_dir = 'val'
-        train_list_file = 'annotations/val.json'
-        validate_image_dir = 'val'
-        validate_list_file = 'annotations/val.json'
-        test_image_dir = 'val'
-        test_list_file = 'annotations/val.json'
-        super(Coco10, self).__init__(
-            base_path, train_image_dir, train_list_file, validate_image_dir,
-            validate_list_file, test_image_dir, test_list_file, model_type)
--- a/paddlehub/dataset/dataset.py
+++ b/paddlehub/dataset/dataset.py
@@ -106,6 +106,7 @@ class BaseDataset(object):
                    "As label_list has been assigned, label_file is noneffective"
                )

+        if self.label_list:
            self.label_index = dict(
                zip(self.label_list, range(len(self.label_list))))


--- a/paddlehub/finetune/task/base_task.py
+++ b/paddlehub/finetune/task/base_task.py
@@ -35,7 +35,7 @@ import paddle.fluid as fluid
 from visualdl import LogWriter

 import paddlehub as hub
-from paddlehub.reader.nlp_reader import BaseNLPReader
+from paddlehub.reader.nlp_reader import BaseReader, BaseNLPReader
 from paddlehub.common.paddle_helper import dtype_map, clone_program
 from paddlehub.common.utils import mkdir
 from paddlehub.common.dir import tmp_dir
@@ -350,8 +350,9 @@ class BaseTask(object):
        self._base_data_reader = data_reader
        self._base_feed_list = feed_list

-        if isinstance(data_reader, BaseNLPReader):
+        if isinstance(data_reader, BaseReader):
            self._compatible_mode = True
+            if isinstance(data_reader, BaseNLPReader):
                logger.warning(
                    "PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, "
                    "in which you can use your tokenizer to preprocess dataset and run task in a clear flow. "

--- a/paddlehub/finetune/task/classifier_task.py
+++ b/paddlehub/finetune/task/classifier_task.py
@@ -196,7 +196,6 @@ class TextClassifierTask(ClassifierTask):
            feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None.
            token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None.
            network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None.
-            main_program (object): the customized main program, default None.
            startup_program (object): the customized startup program, default None.
            config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None.
            hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None.

--- a/paddlehub/finetune/task/generation_task.py
+++ b/paddlehub/finetune/task/generation_task.py
@@ -164,21 +164,6 @@ class TextGenerationTask(BaseTask):
        # Define decoder and initialize it.
        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout)
-        enc_last_step = fluid.layers.slice(
-            self.token_feature,
-            axes=[1],
-            starts=[-1],
-            ends=[self.token_feature.shape[1] + 1])
-        dec_init_cell = fluid.layers.fc(
-            input=enc_last_step,
-            size=self.hidden_size,
-            num_flatten_dims=1,
-            param_attr=fluid.ParamAttr(
-                name="dec_init_cell_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="dec_init_cell_b",
-                initializer=fluid.initializer.Constant(0.)))
        dec_init_hidden = fluid.layers.fc(
            input=self.feature,
            size=self.hidden_size,
@@ -189,12 +174,14 @@ class TextGenerationTask(BaseTask):
            bias_attr=fluid.ParamAttr(
                name="dec_init_hidden_b",
                initializer=fluid.initializer.Constant(0.)))
-        # TODO: maybe dec_init_hidden can use self.feature, and dec_init_cell can be get_initial_states
-        dec_initial_states = [
-            [[dec_init_hidden, dec_init_cell]] * self.num_layers,
+        dec_initial_states = [[[
+            dec_init_hidden,
            dec_cell.get_initial_states(
-                batch_ref=self.token_feature, shape=[self.hidden_size])
-        ]
+                batch_ref=self.feature, shape=[self.hidden_size])
+        ]] * self.num_layers,
+                              dec_cell.get_initial_states(
+                                  batch_ref=self.feature,
+                                  shape=[self.hidden_size])]
        tar_vocab_size = len(self._label_list)
        tar_embeder = lambda x: fluid.embedding(
            input=x,