From 2c825d54fa5eafe6b873f3e466313102965614e2 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Fri, 10 Jul 2020 17:24:45 +0800 Subject: [PATCH] ready 1.8 (#745) --- demo/object_detection/predict.py | 103 --------------------- demo/object_detection/train.py | 102 -------------------- paddlehub/dataset/__init__.py | 1 - paddlehub/dataset/coco10.py | 41 -------- paddlehub/dataset/dataset.py | 5 +- paddlehub/finetune/task/base_task.py | 15 +-- paddlehub/finetune/task/classifier_task.py | 1 - paddlehub/finetune/task/generation_task.py | 27 ++---- 8 files changed, 18 insertions(+), 277 deletions(-) delete mode 100644 demo/object_detection/predict.py delete mode 100644 demo/object_detection/train.py delete mode 100644 paddlehub/dataset/coco10.py diff --git a/demo/object_detection/predict.py b/demo/object_detection/predict.py deleted file mode 100644 index a68656ce..00000000 --- a/demo/object_detection/predict.py +++ /dev/null @@ -1,103 +0,0 @@ -#coding:utf-8 -import argparse -import os -import ast - -import paddle.fluid as fluid -import paddlehub as hub -import numpy as np -from paddlehub.reader.cv_reader import ObjectDetectionReader -from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset -from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out -from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature -from paddlehub.common import detection_config as dconf - -# yapf: disable -parser = argparse.ArgumentParser(__doc__) -parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for predict.") -parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") -parser.add_argument("--batch_size", type=int, default=2, help="Total examples' number in batch for training.") -parser.add_argument("--module", type=str, default="ssd", help="Module used as a feature extractor.") -parser.add_argument("--dataset", type=str, default="coco10", help="Dataset to finetune.") -# yapf: enable. - -module_map = { - "yolov3": "yolov3_darknet53_coco2017", - "ssd": "ssd_vgg16_512_coco2017", - "faster_rcnn": "faster_rcnn_resnet50_coco2017", -} - - -def predict(args): - module_name = args.module # 'yolov3_darknet53_coco2017' - model_type = get_model_type(module_name) # 'yolo' - # define data - ds = hub.dataset.Coco10(model_type) - print("ds.num_labels", ds.num_labels) - - data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type) - - # define model(program) - module = hub.Module(name=module_name) - if model_type == 'rcnn': - input_dict, output_dict, program = module.context( - trainable=True, phase='train') - input_dict_pred, output_dict_pred, program_pred = module.context( - trainable=False) - else: - input_dict, output_dict, program = module.context(trainable=True) - input_dict_pred = output_dict_pred = None - feed_list, pred_feed_list = get_feed_list(module_name, input_dict, - input_dict_pred) - feature, pred_feature = get_mid_feature(module_name, output_dict, - output_dict_pred) - - config = hub.RunConfig( - use_data_parallel=False, - use_pyreader=True, - use_cuda=args.use_gpu, - batch_size=args.batch_size, - enable_memory_optim=False, - checkpoint_dir=args.checkpoint_dir, - strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) - - task = hub.DetectionTask( - data_reader=data_reader, - num_classes=ds.num_labels, - feed_list=feed_list, - feature=feature, - predict_feed_list=pred_feed_list, - predict_feature=pred_feature, - model_type=model_type, - config=config) - - data = [ - "./test/test_img_bird.jpg", - "./test/test_img_cat.jpg", - ] - label_map = ds.label_dict() - run_states = task.predict(data=data, accelerate_mode=False) - results = [run_state.run_results for run_state in run_states] - for outs in results: - keys = ['im_shape', 'im_id', 'bbox'] - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - print("im_id", res['im_id']) - is_bbox_normalized = dconf.conf[model_type]['is_bbox_normalized'] - clsid2catid = {} - for k in label_map: - clsid2catid[k] = k - bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) - print(bbox_results) - - -if __name__ == "__main__": - args = parser.parse_args() - if not args.module in module_map: - hub.logger.error("module should in %s" % module_map.keys()) - exit(1) - args.module = module_map[args.module] - - predict(args) diff --git a/demo/object_detection/train.py b/demo/object_detection/train.py deleted file mode 100644 index a0f3833d..00000000 --- a/demo/object_detection/train.py +++ /dev/null @@ -1,102 +0,0 @@ -# -*- coding:utf8 -*- -import argparse -import os -import ast - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.reader.cv_reader import ObjectDetectionReader -from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset -import numpy as np -from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature - -# yapf: disable -parser = argparse.ArgumentParser(__doc__) -parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.") -parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.") -parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") -parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") -parser.add_argument("--module", type=str, default="ssd", help="Module used as feature extractor.") -parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") -parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") -# yapf: enable. - -module_map = { - "yolov3": "yolov3_darknet53_coco2017", - "ssd": "ssd_vgg16_512_coco2017", - "faster_rcnn": "faster_rcnn_resnet50_coco2017", -} - - -def finetune(args): - module_name = args.module # 'yolov3_darknet53_coco2017' - model_type = get_model_type(module_name) # 'yolo' - # define dataset - ds = hub.dataset.Coco10(model_type) - # base_path = '/home/local3/zhaopenghao/data/detect/paddle-job-84942-0' - # train_dir = 'train_data/images' - # train_list = 'train_data/coco/instances_coco.json' - # val_dir = 'eval_data/images' - # val_list = 'eval_data/coco/instances_coco.json' - # ds = ObjectDetectionDataset(base_path, train_dir, train_list, val_dir, val_list, val_dir, val_list, model_type=model_type) - # print(ds.label_dict()) - print("ds.num_labels", ds.num_labels) - - # define batch reader - data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type) - - # define model(program) - module = hub.Module(name=module_name) - if model_type == 'rcnn': - input_dict, output_dict, program = module.context( - trainable=True, phase='train') - input_dict_pred, output_dict_pred, program_pred = module.context( - trainable=False) - else: - input_dict, output_dict, program = module.context(trainable=True) - input_dict_pred = output_dict_pred = None - - print("input_dict keys", input_dict.keys()) - print("output_dict keys", output_dict.keys()) - feed_list, pred_feed_list = get_feed_list(module_name, input_dict, - input_dict_pred) - print("output_dict length:", len(output_dict)) - print(output_dict.keys()) - if output_dict_pred is not None: - print(output_dict_pred.keys()) - feature, pred_feature = get_mid_feature(module_name, output_dict, - output_dict_pred) - - config = hub.RunConfig( - log_interval=10, - eval_interval=100, - use_data_parallel=args.use_data_parallel, - use_pyreader=True, - use_cuda=args.use_gpu, - num_epoch=args.num_epoch, - batch_size=args.batch_size, - enable_memory_optim=False, - checkpoint_dir=args.checkpoint_dir, - strategy=hub.finetune.strategy.DefaultFinetuneStrategy( - learning_rate=0.00025, optimizer_name="adam")) - - task = hub.DetectionTask( - data_reader=data_reader, - num_classes=ds.num_labels, - feed_list=feed_list, - feature=feature, - predict_feed_list=pred_feed_list, - predict_feature=pred_feature, - model_type=model_type, - config=config) - task.finetune_and_eval() - - -if __name__ == "__main__": - args = parser.parse_args() - if not args.module in module_map: - hub.logger.error("module should in %s" % module_map.keys()) - exit(1) - args.module = module_map[args.module] - - finetune(args) diff --git a/paddlehub/dataset/__init__.py b/paddlehub/dataset/__init__.py index e1b1c2a2..49610e47 100644 --- a/paddlehub/dataset/__init__.py +++ b/paddlehub/dataset/__init__.py @@ -38,4 +38,3 @@ from .flowers import FlowersDataset as Flowers from .stanford_dogs import StanfordDogsDataset as StanfordDogs from .food101 import Food101Dataset as Food101 from .indoor67 import Indoor67Dataset as Indoor67 -from .coco10 import Coco10 diff --git a/paddlehub/dataset/coco10.py b/paddlehub/dataset/coco10.py deleted file mode 100644 index 1760dde9..00000000 --- a/paddlehub/dataset/coco10.py +++ /dev/null @@ -1,41 +0,0 @@ -#coding:utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import paddlehub as hub -from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset - - -class Coco10(ObjectDetectionDataset): - def __init__(self, model_type='ssd'): - dataset_path = os.path.join(hub.common.dir.DATA_HOME, "coco_10") - # self.base_path = self._download_dataset( - # dataset_path=dataset_path, - # url="https://bj.bcebos.com/paddlehub-dataset/dog-cat.tar.gz") - base_path = dataset_path - train_image_dir = 'val' - train_list_file = 'annotations/val.json' - validate_image_dir = 'val' - validate_list_file = 'annotations/val.json' - test_image_dir = 'val' - test_list_file = 'annotations/val.json' - super(Coco10, self).__init__( - base_path, train_image_dir, train_list_file, validate_image_dir, - validate_list_file, test_image_dir, test_list_file, model_type) diff --git a/paddlehub/dataset/dataset.py b/paddlehub/dataset/dataset.py index 9b202bd2..18616737 100644 --- a/paddlehub/dataset/dataset.py +++ b/paddlehub/dataset/dataset.py @@ -106,8 +106,9 @@ class BaseDataset(object): "As label_list has been assigned, label_file is noneffective" ) - self.label_index = dict( - zip(self.label_list, range(len(self.label_list)))) + if self.label_list: + self.label_index = dict( + zip(self.label_list, range(len(self.label_list)))) def get_train_examples(self): return self.train_examples diff --git a/paddlehub/finetune/task/base_task.py b/paddlehub/finetune/task/base_task.py index a6fa1781..1ae7773a 100644 --- a/paddlehub/finetune/task/base_task.py +++ b/paddlehub/finetune/task/base_task.py @@ -35,7 +35,7 @@ import paddle.fluid as fluid from visualdl import LogWriter import paddlehub as hub -from paddlehub.reader.nlp_reader import BaseNLPReader +from paddlehub.reader.nlp_reader import BaseReader, BaseNLPReader from paddlehub.common.paddle_helper import dtype_map, clone_program from paddlehub.common.utils import mkdir from paddlehub.common.dir import tmp_dir @@ -350,13 +350,14 @@ class BaseTask(object): self._base_data_reader = data_reader self._base_feed_list = feed_list - if isinstance(data_reader, BaseNLPReader): + if isinstance(data_reader, BaseReader): self._compatible_mode = True - logger.warning( - "PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, " - "in which you can use your tokenizer to preprocess dataset and run task in a clear flow. " - "New demo see https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.8/demo/text_classification/text_cls.py" - ) + if isinstance(data_reader, BaseNLPReader): + logger.warning( + "PaddleHub v1.8 has deprecated the reader and feed_list parameters in the nlp Task. We provided an easier usage, " + "in which you can use your tokenizer to preprocess dataset and run task in a clear flow. " + "New demo see https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.8/demo/text_classification/text_cls.py" + ) else: self._compatible_mode = False diff --git a/paddlehub/finetune/task/classifier_task.py b/paddlehub/finetune/task/classifier_task.py index 5bc2c409..e3b06a2a 100644 --- a/paddlehub/finetune/task/classifier_task.py +++ b/paddlehub/finetune/task/classifier_task.py @@ -196,7 +196,6 @@ class TextClassifierTask(ClassifierTask): feature(Variable): the `feature` will be used to classify texts. It must be the sentence-level feature, shape as [-1, emb_size]. `Token_feature` and `feature` couldn't be setted at the same time. One of them must be setted as not None. Default None. token_feature(Variable): the `feature` will be used to connect the pre-defined network. It must be the token-level feature, shape as [-1, seq_len, emb_size]. Default None. network(str): the pre-defined network. Choices: 'bilstm', 'bow', 'cnn', 'dpcnn', 'gru' and 'lstm'. Default None. If network is setted, then `token_feature` must be setted and `feature` must be None. - main_program (object): the customized main program, default None. startup_program (object): the customized startup program, default None. config (RunConfig): run config for the task, such as batch_size, epoch, learning_rate setting and so on. Default None. hidden_units(list): the element of `hidden_units` list is the full-connect layer size. It will add the full-connect layers to the program. Default None. diff --git a/paddlehub/finetune/task/generation_task.py b/paddlehub/finetune/task/generation_task.py index efed8386..88061876 100644 --- a/paddlehub/finetune/task/generation_task.py +++ b/paddlehub/finetune/task/generation_task.py @@ -164,21 +164,6 @@ class TextGenerationTask(BaseTask): # Define decoder and initialize it. dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size, self.dropout) - enc_last_step = fluid.layers.slice( - self.token_feature, - axes=[1], - starts=[-1], - ends=[self.token_feature.shape[1] + 1]) - dec_init_cell = fluid.layers.fc( - input=enc_last_step, - size=self.hidden_size, - num_flatten_dims=1, - param_attr=fluid.ParamAttr( - name="dec_init_cell_w", - initializer=fluid.initializer.TruncatedNormal(scale=0.02)), - bias_attr=fluid.ParamAttr( - name="dec_init_cell_b", - initializer=fluid.initializer.Constant(0.))) dec_init_hidden = fluid.layers.fc( input=self.feature, size=self.hidden_size, @@ -189,12 +174,14 @@ class TextGenerationTask(BaseTask): bias_attr=fluid.ParamAttr( name="dec_init_hidden_b", initializer=fluid.initializer.Constant(0.))) - # TODO: maybe dec_init_hidden can use self.feature, and dec_init_cell can be get_initial_states - dec_initial_states = [ - [[dec_init_hidden, dec_init_cell]] * self.num_layers, + dec_initial_states = [[[ + dec_init_hidden, dec_cell.get_initial_states( - batch_ref=self.token_feature, shape=[self.hidden_size]) - ] + batch_ref=self.feature, shape=[self.hidden_size]) + ]] * self.num_layers, + dec_cell.get_initial_states( + batch_ref=self.feature, + shape=[self.hidden_size])] tar_vocab_size = len(self._label_list) tar_embeder = lambda x: fluid.embedding( input=x, -- GitLab