diff --git a/demo/object_detection/predict.py b/demo/object_detection/predict.py deleted file mode 100644 index bb4994deb2556f3cc743ebf9dba4f6d3a7485b36..0000000000000000000000000000000000000000 --- a/demo/object_detection/predict.py +++ /dev/null @@ -1,96 +0,0 @@ -#coding:utf-8 -import argparse -import os -import ast - -import paddle.fluid as fluid -import paddlehub as hub -import numpy as np -from paddlehub.reader.cv_reader import ObjectDetectionReader -from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset -from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out -from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature -from paddlehub.common import detection_config as dconf - -# yapf: disable -parser = argparse.ArgumentParser(__doc__) -parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for predict.") -parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") -parser.add_argument("--batch_size", type=int, default=2, help="Total examples' number in batch for training.") -parser.add_argument("--module", type=str, default="ssd", help="Module used as a feature extractor.") -parser.add_argument("--dataset", type=str, default="coco10", help="Dataset to finetune.") -# yapf: enable. - -module_map = { - "yolov3": "yolov3_darknet53_coco2017", - "ssd": "ssd_vgg16_512_coco2017", - "faster_rcnn": "faster_rcnn_resnet50_coco2017", -} - - -def predict(args): - module_name = args.module # 'yolov3_darknet53_coco2017' - model_type = get_model_type(module_name) # 'yolo' - # define data - ds = hub.dataset.Coco10(model_type) - print("ds.num_labels", ds.num_labels) - - data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type) - - # define model(program) - module = hub.Module(name=module_name) - if model_type == 'rcnn': - input_dict, output_dict, program = module.context(trainable=True, phase='train') - input_dict_pred, output_dict_pred, program_pred = module.context(trainable=False) - else: - input_dict, output_dict, program = module.context(trainable=True) - input_dict_pred = output_dict_pred = None - feed_list, pred_feed_list = get_feed_list(module_name, input_dict, input_dict_pred) - feature, pred_feature = get_mid_feature(module_name, output_dict, output_dict_pred) - - config = hub.RunConfig( - use_data_parallel=False, - use_pyreader=True, - use_cuda=args.use_gpu, - batch_size=args.batch_size, - enable_memory_optim=False, - checkpoint_dir=args.checkpoint_dir, - strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) - - task = hub.DetectionTask( - data_reader=data_reader, - num_classes=ds.num_labels, - feed_list=feed_list, - feature=feature, - predict_feed_list=pred_feed_list, - predict_feature=pred_feature, - model_type=model_type, - config=config) - - data = ["./test/test_img_bird.jpg", "./test/test_img_cat.jpg",] - label_map = ds.label_dict() - run_states = task.predict(data=data, accelerate_mode=False) - results = [run_state.run_results for run_state in run_states] - for outs in results: - keys = ['im_shape', 'im_id', 'bbox'] - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - print("im_id", res['im_id']) - is_bbox_normalized = dconf.conf[model_type]['is_bbox_normalized'] - clsid2catid = {} - for k in label_map: - clsid2catid[k] = k - bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) - print(bbox_results) - - -if __name__ == "__main__": - args = parser.parse_args() - if not args.module in module_map: - hub.logger.error("module should in %s" % module_map.keys()) - exit(1) - args.module = module_map[args.module] - - predict(args) diff --git a/demo/object_detection/predict_faster_rcnn.py b/demo/object_detection/predict_faster_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..0758f0caa1b5632ab2510d8cea0b9ff1a6173fd6 --- /dev/null +++ b/demo/object_detection/predict_faster_rcnn.py @@ -0,0 +1,82 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="faster_rcnn_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="faster_rcnn_resnet50_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +# yapf: enable. + + +def predict(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('rcnn') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='rcnn') + + input_dict, output_dict, program = module.context(trainable=True) + pred_input_dict, pred_output_dict, pred_program = module.context( + trainable=False, phase='predict') + + feed_list = [ + input_dict["image"].name, input_dict["im_info"].name, + input_dict['gt_bbox'].name, input_dict['gt_class'].name, + input_dict['is_crowd'].name + ] + + pred_feed_list = [ + pred_input_dict['image'].name, pred_input_dict['im_info'].name, + pred_input_dict['im_shape'].name + ] + + feature = [ + output_dict['head_feat'], output_dict['rpn_cls_loss'], + output_dict['rpn_reg_loss'], output_dict['generate_proposal_labels'] + ] + + pred_feature = [pred_output_dict['head_feat'], pred_output_dict['rois']] + + config = hub.RunConfig( + use_data_parallel=False, + use_pyreader=True, + use_cuda=args.use_gpu, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) + + task = hub.FasterRCNNTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + predict_feed_list=pred_feed_list, + predict_feature=pred_feature, + config=config) + + data = [ + "./test/test_img_bird.jpg", + "./test/test_img_cat.jpg", + ] + label_map = dataset.label_dict() + results = task.predict(data=data, return_result=True, accelerate_mode=False) + print(results) + + +if __name__ == "__main__": + args = parser.parse_args() + predict(args) diff --git a/demo/object_detection/predict_ssd.py b/demo/object_detection/predict_ssd.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa8732cde40e26dc52ec35134706f12f3a1165d --- /dev/null +++ b/demo/object_detection/predict_ssd.py @@ -0,0 +1,63 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="ssd_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="ssd_vgg16_512_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +# yapf: enable. + + +def predict(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('ssd') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='ssd') + + input_dict, output_dict, program = module.context(trainable=True) + feed_list = [input_dict["image"].name, input_dict["im_size"].name] + feature = output_dict['body_features'] + + config = hub.RunConfig( + use_data_parallel=False, + use_pyreader=True, + use_cuda=args.use_gpu, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) + + task = hub.SSDTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + multi_box_head_config=module.multi_box_head_config, + config=config) + + data = [ + "./test/test_img_bird.jpg", + "./test/test_img_cat.jpg", + ] + label_map = dataset.label_dict() + results = task.predict(data=data, return_result=True, accelerate_mode=False) + print(results) + + +if __name__ == "__main__": + args = parser.parse_args() + predict(args) diff --git a/demo/object_detection/predict_yolo.py b/demo/object_detection/predict_yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..4aa8732cde40e26dc52ec35134706f12f3a1165d --- /dev/null +++ b/demo/object_detection/predict_yolo.py @@ -0,0 +1,63 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="ssd_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="ssd_vgg16_512_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +# yapf: enable. + + +def predict(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('ssd') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='ssd') + + input_dict, output_dict, program = module.context(trainable=True) + feed_list = [input_dict["image"].name, input_dict["im_size"].name] + feature = output_dict['body_features'] + + config = hub.RunConfig( + use_data_parallel=False, + use_pyreader=True, + use_cuda=args.use_gpu, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) + + task = hub.SSDTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + multi_box_head_config=module.multi_box_head_config, + config=config) + + data = [ + "./test/test_img_bird.jpg", + "./test/test_img_cat.jpg", + ] + label_map = dataset.label_dict() + results = task.predict(data=data, return_result=True, accelerate_mode=False) + print(results) + + +if __name__ == "__main__": + args = parser.parse_args() + predict(args) diff --git a/demo/object_detection/test/test.txt b/demo/object_detection/test/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e88dd74263037b483d9cf4f3210f852d319303d4 --- /dev/null +++ b/demo/object_detection/test/test.txt @@ -0,0 +1,2 @@ +IMAGE_PATH +./resources/test/test_img_bird.jpg diff --git a/demo/object_detection/test/test.yml b/demo/object_detection/test/test.yml new file mode 100644 index 0000000000000000000000000000000000000000..a37449b83b4732769ae9f12170330dc8bda32c83 --- /dev/null +++ b/demo/object_detection/test/test.yml @@ -0,0 +1,6 @@ +input_data: + image: + type : IMAGE + key : IMAGE_PATH +config: + top_only : True diff --git a/demo/object_detection/test/test_img_bird.jpg b/demo/object_detection/test/test_img_bird.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2f4690c6e6f1cacca38b9dc80059383bc4ba93b7 Binary files /dev/null and b/demo/object_detection/test/test_img_bird.jpg differ diff --git a/demo/object_detection/test/test_img_cat.jpg b/demo/object_detection/test/test_img_cat.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c1055eca9e249e6780078638dfd90ea3200a4716 Binary files /dev/null and b/demo/object_detection/test/test_img_cat.jpg differ diff --git a/demo/object_detection/test/test_img_daisy.jpg b/demo/object_detection/test/test_img_daisy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..688cc3e6bda17c384116bc3cec0eba6474984a1d Binary files /dev/null and b/demo/object_detection/test/test_img_daisy.jpg differ diff --git a/demo/object_detection/test/test_img_roses.jpg b/demo/object_detection/test/test_img_roses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..711c883e09aa528bafd26021798aa11506e2113d Binary files /dev/null and b/demo/object_detection/test/test_img_roses.jpg differ diff --git a/demo/object_detection/test/test_img_sheep.jpg b/demo/object_detection/test/test_img_sheep.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94f1e4b5e61f873003da0a0175764892e0047239 Binary files /dev/null and b/demo/object_detection/test/test_img_sheep.jpg differ diff --git a/demo/object_detection/train.py b/demo/object_detection/train.py deleted file mode 100644 index 00590dc15fdc2cb704e287a646db9d9ba0515064..0000000000000000000000000000000000000000 --- a/demo/object_detection/train.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding:utf8 -*- -import argparse -import os -import ast - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.reader.cv_reader import ObjectDetectionReader -from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset -import numpy as np -from paddlehub.common.detection_config import get_model_type, get_feed_list, get_mid_feature - -# yapf: disable -parser = argparse.ArgumentParser(__doc__) -parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.") -parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning.") -parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") -parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") -parser.add_argument("--module", type=str, default="ssd", help="Module used as feature extractor.") -parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") -parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") -# yapf: enable. - -module_map = { - "yolov3": "yolov3_darknet53_coco2017", - "ssd": "ssd_vgg16_512_coco2017", - "faster_rcnn": "faster_rcnn_resnet50_coco2017", -} - - -def finetune(args): - module_name = args.module # 'yolov3_darknet53_coco2017' - model_type = get_model_type(module_name) # 'yolo' - # define dataset - ds = hub.dataset.Coco10(model_type) - # base_path = '/home/local3/zhaopenghao/data/detect/paddle-job-84942-0' - # train_dir = 'train_data/images' - # train_list = 'train_data/coco/instances_coco.json' - # val_dir = 'eval_data/images' - # val_list = 'eval_data/coco/instances_coco.json' - # ds = ObjectDetectionDataset(base_path, train_dir, train_list, val_dir, val_list, val_dir, val_list, model_type=model_type) - # print(ds.label_dict()) - print("ds.num_labels", ds.num_labels) - - # define batch reader - data_reader = ObjectDetectionReader(dataset=ds, model_type=model_type) - - # define model(program) - module = hub.Module(name=module_name) - if model_type == 'rcnn': - input_dict, output_dict, program = module.context(trainable=True, phase='train') - input_dict_pred, output_dict_pred, program_pred = module.context(trainable=False) - else: - input_dict, output_dict, program = module.context(trainable=True) - input_dict_pred = output_dict_pred = None - - print("input_dict keys", input_dict.keys()) - print("output_dict keys", output_dict.keys()) - feed_list, pred_feed_list = get_feed_list(module_name, input_dict, input_dict_pred) - print("output_dict length:", len(output_dict)) - print(output_dict.keys()) - if output_dict_pred is not None: - print(output_dict_pred.keys()) - feature, pred_feature = get_mid_feature(module_name, output_dict, output_dict_pred) - - config = hub.RunConfig( - log_interval=10, - eval_interval=100, - use_data_parallel=args.use_data_parallel, - use_pyreader=True, - use_cuda=args.use_gpu, - num_epoch=args.num_epoch, - batch_size=args.batch_size, - enable_memory_optim=False, - checkpoint_dir=args.checkpoint_dir, - strategy=hub.finetune.strategy.DefaultFinetuneStrategy(learning_rate=0.00025, optimizer_name="adam")) - - task = hub.DetectionTask( - data_reader=data_reader, - num_classes=ds.num_labels, - feed_list=feed_list, - feature=feature, - predict_feed_list=pred_feed_list, - predict_feature=pred_feature, - model_type=model_type, - config=config) - task.finetune_and_eval() - - -if __name__ == "__main__": - args = parser.parse_args() - if not args.module in module_map: - hub.logger.error("module should in %s" % module_map.keys()) - exit(1) - args.module = module_map[args.module] - - finetune(args) diff --git a/demo/object_detection/train_faster_rcnn.py b/demo/object_detection/train_faster_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..883182a8195f3ead6a5be93bc30262f90a793c93 --- /dev/null +++ b/demo/object_detection/train_faster_rcnn.py @@ -0,0 +1,81 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.") +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="faster_rcnn_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="faster_rcnn_resnet50_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") +# yapf: enable. + + +def finetune(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('rcnn') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='rcnn') + + input_dict, output_dict, program = module.context(trainable=True) + pred_input_dict, pred_output_dict, pred_program = module.context( + trainable=False, phase='predict') + + feed_list = [ + input_dict["image"].name, input_dict["im_info"].name, + input_dict['gt_bbox'].name, input_dict['gt_class'].name, + input_dict['is_crowd'].name + ] + + pred_feed_list = [ + pred_input_dict['image'].name, pred_input_dict['im_info'].name, + pred_input_dict['im_shape'].name + ] + + feature = [ + output_dict['head_feat'], output_dict['rpn_cls_loss'], + output_dict['rpn_reg_loss'], output_dict['generate_proposal_labels'] + ] + + pred_feature = [pred_output_dict['head_feat'], pred_output_dict['rois']] + + config = hub.RunConfig( + log_interval=10, + eval_interval=100, + use_data_parallel=args.use_data_parallel, + use_pyreader=True, + use_cuda=args.use_gpu, + num_epoch=args.num_epoch, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy( + learning_rate=0.00025, optimizer_name="adam")) + + task = hub.FasterRCNNTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + predict_feed_list=pred_feed_list, + predict_feature=pred_feature, + config=config) + task.finetune_and_eval() + + +if __name__ == "__main__": + args = parser.parse_args() + finetune(args) diff --git a/demo/object_detection/train_ssd.py b/demo/object_detection/train_ssd.py new file mode 100644 index 0000000000000000000000000000000000000000..0bdec2937be9552f79266c43eebb74bc29d66ef5 --- /dev/null +++ b/demo/object_detection/train_ssd.py @@ -0,0 +1,62 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.") +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="ssd_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="ssd_vgg16_512_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") +# yapf: enable. + + +def finetune(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('ssd') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='ssd') + + input_dict, output_dict, program = module.context(trainable=True) + feed_list = [input_dict["image"].name, input_dict["im_size"].name] + feature = output_dict['body_features'] + + config = hub.RunConfig( + log_interval=10, + eval_interval=100, + use_data_parallel=args.use_data_parallel, + use_pyreader=True, + use_cuda=args.use_gpu, + num_epoch=args.num_epoch, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy( + learning_rate=0.00025, optimizer_name="adam")) + + task = hub.SSDTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + multi_box_head_config=module.multi_box_head_config, + config=config) + task.finetune_and_eval() + + +if __name__ == "__main__": + args = parser.parse_args() + finetune(args) diff --git a/demo/object_detection/train_yolo.py b/demo/object_detection/train_yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..16d7dcc75ddcf23a66310843eca2441ff71ea647 --- /dev/null +++ b/demo/object_detection/train_yolo.py @@ -0,0 +1,61 @@ +# -*- coding:utf8 -*- +import argparse +import os +import ast + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddlehub.reader.cv_reader import ObjectDetectionReader +from paddlehub.dataset.base_cv_dataset import ObjectDetectionDataset + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--num_epoch", type=int, default=50, help="Number of epoches for fine-tuning.") +parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning.") +parser.add_argument("--checkpoint_dir", type=str, default="yolo_finetune_ckpt", help="Path to save log data.") +parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") +parser.add_argument("--module", type=str, default="yolov3_darknet53_coco2017", help="Module used as feature extractor.") +parser.add_argument("--dataset", type=str, default="coco_10", help="Dataset to finetune.") +parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") +# yapf: enable. + + +def finetune(args): + module = hub.Module(name=args.module) + dataset = hub.dataset.Coco10('yolo') + + print("dataset.num_labels:", dataset.num_labels) + + # define batch reader + data_reader = ObjectDetectionReader(dataset=dataset, model_type='yolo') + + input_dict, output_dict, program = module.context(trainable=True) + feed_list = [input_dict["image"].name, input_dict["im_size"].name] + feature = output_dict['head_features'] + + config = hub.RunConfig( + log_interval=10, + eval_interval=100, + use_data_parallel=args.use_data_parallel, + use_pyreader=True, + use_cuda=args.use_gpu, + num_epoch=args.num_epoch, + batch_size=args.batch_size, + enable_memory_optim=False, + checkpoint_dir=args.checkpoint_dir, + strategy=hub.finetune.strategy.DefaultFinetuneStrategy( + learning_rate=0.00025, optimizer_name="adam")) + + task = hub.YOLOTask( + data_reader=data_reader, + num_classes=dataset.num_labels, + feed_list=feed_list, + feature=feature, + config=config) + task.finetune_and_eval() + + +if __name__ == "__main__": + args = parser.parse_args() + finetune(args) diff --git a/paddlehub/__init__.py b/paddlehub/__init__.py index 9a1559d702218a6afef2a2c58683046e28e67a6b..c4dfb9fe555133a93ee75749538b1229b66a619f 100644 --- a/paddlehub/__init__.py +++ b/paddlehub/__init__.py @@ -48,7 +48,9 @@ from .io.type import DataType from .finetune.task import BaseTask from .finetune.task import ClassifierTask -from .finetune.task import DetectionTask +from .finetune.task import SSDTask +from .finetune.task import YOLOTask +from .finetune.task import FasterRCNNTask from .finetune.task import TextClassifierTask from .finetune.task import ImageClassifierTask from .finetune.task import SequenceLabelTask diff --git a/paddlehub/finetune/task/__init__.py b/paddlehub/finetune/task/__init__.py index e4457699bfaa5fe0156d64a22304952821c0a088..457cbc65d17d3757a27272dccc99ed571880876f 100644 --- a/paddlehub/finetune/task/__init__.py +++ b/paddlehub/finetune/task/__init__.py @@ -15,7 +15,9 @@ from .base_task import BaseTask, RunEnv, RunState from .classifier_task import ClassifierTask, ImageClassifierTask, TextClassifierTask, MultiLabelClassifierTask -from .detection_task import DetectionTask +from .ssd_task import SSDTask +from .yolo_task import YOLOTask +from .faster_rcnn_task import FasterRCNNTask from .reading_comprehension_task import ReadingComprehensionTask from .regression_task import RegressionTask from .sequence_task import SequenceLabelTask diff --git a/paddlehub/finetune/task/base_task.py b/paddlehub/finetune/task/base_task.py index ff589c034202183fc406b04502946f615cffc71b..7e1ebf4d7013402c8a32e56da04bd83fd9da73fc 100644 --- a/paddlehub/finetune/task/base_task.py +++ b/paddlehub/finetune/task/base_task.py @@ -403,6 +403,8 @@ class BaseTask(object): with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): + if self.is_train_phase or self.is_test_phase: + self.env.labels = self._add_label() self.env.outputs = self._build_net() if self.is_train_phase or self.is_test_phase: self.env.labels = self._add_label() @@ -557,9 +559,6 @@ class BaseTask(object): @property def labels(self): - if self.is_predict_phase: - raise RuntimeError() - if not self.env.is_inititalized: self._build_env() return self.env.labels diff --git a/paddlehub/finetune/task/detection_task.py b/paddlehub/finetune/task/detection_task.py index 1684105530cdd211c3a7e6372a9cb17a8e0ddece..11c2afe81ef4a2eedadd2c7abca36aa4680d2a91 100644 --- a/paddlehub/finetune/task/detection_task.py +++ b/paddlehub/finetune/task/detection_task.py @@ -27,9 +27,10 @@ from paddle.fluid.initializer import Normal, Xavier from paddle.fluid.regularizer import L2Decay from paddle.fluid.initializer import MSRA -from .base_task import BaseTask -from ...contrib.ppdet.utils.eval_utils import eval_results -from ...common import detection_config as dconf +from paddlehub.finetune.task.base_task import BaseTask +from paddlehub.contrib.ppdet.utils.eval_utils import eval_results +from paddlehub.contrib.ppdet.utils.coco_eval import bbox2out +from paddlehub.common import detection_config as dconf from paddlehub.common.paddle_helper import clone_program feed_var_def = [ @@ -108,12 +109,9 @@ class DetectionTask(BaseTask): num_classes, feed_list, feature, - model_type='ssd', - predict_feature=None, - predict_feed_list=None, + model_type, startup_program=None, config=None, - hidden_units=None, metrics_choices="default"): if metrics_choices == "default": metrics_choices = ["ap"] @@ -127,35 +125,21 @@ class DetectionTask(BaseTask): config=config, metrics_choices=metrics_choices) - if predict_feature is not None: - main_program = predict_feature[0].block.program - self._predict_base_main_program = clone_program( - main_program, for_test=False) - else: - self._predict_base_main_program = None - self._predict_base_feed_list = predict_feed_list - self.feature = feature - self.predict_feature = predict_feature - self.num_classes = num_classes - self.hidden_units = hidden_units self.model_type = model_type - @property - def base_main_program(self): - if not self.is_train_phase and self._predict_base_main_program is not None: - return self._predict_base_main_program - return self._base_main_program - - @property - def base_feed_list(self): - if not self.is_train_phase and self._predict_base_feed_list is not None: - return self._predict_base_feed_list - return self._base_feed_list - - @property - def base_feed_var_list(self): - vars = self.main_program.global_block().vars - return [vars[varname] for varname in self.base_feed_list] + def save_inference_model(self, + dirname, + model_filename=None, + params_filename=None): + with self.phase_guard("predict"): + fluid.io.save_inference_model( + dirname=dirname, + executor=self.exe, + feeded_var_names=self._feed_list(for_export=True), + target_vars=self.fetch_var_list, + main_program=self.main_program, + model_filename=model_filename, + params_filename=params_filename) @property def return_numpy(self): @@ -195,503 +179,6 @@ class DetectionTask(BaseTask): labels.append(l) return labels - def _ssd_build_net(self): - feature_list = self.feature - image = self.base_feed_var_list[0] - - # fix input size according to its module - mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head( - inputs=feature_list, - image=image, - num_classes=self.num_classes, - aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], - [2.]], - base_size=512, # 300, - min_sizes=[20.0, 51.0, 133.0, 215.0, 296.0, 378.0, - 460.0], # [60.0, 105.0, 150.0, 195.0, 240.0, 285.0], - max_sizes=[51.0, 133.0, 215.0, 296.0, 378.0, 460.0, - 542.0], # [[], 150.0, 195.0, 240.0, 285.0, 300.0], - steps=[8, 16, 32, 64, 128, 256, 512], - min_ratio=15, - max_ratio=90, - kernel_size=3, - offset=0.5, - flip=True, - pad=1, - ) - - self.env.mid_vars = [mbox_locs, mbox_confs, box, box_var] - - nmsed_out = fluid.layers.detection_output( - mbox_locs, - mbox_confs, - box, - box_var, - background_label=0, - nms_threshold=0.45, - nms_top_k=400, - keep_top_k=200, - score_threshold=0.01, - nms_eta=1.0) - - if self.is_predict_phase: # add im_id - self.env.labels = self._ssd_add_label() - return [nmsed_out] - - def _ssd_add_label(self): - # train: 'gt_box', 'gt_label' - # dev: 'im_shape', 'im_id', 'gt_box', 'gt_label', 'is_difficult' - if self.is_train_phase: - idx_list = [1, 2] # 'gt_box', 'gt_label' - elif self.is_test_phase: - # xTodo: remove 'im_shape' when using new module - idx_list = [2, 3, 4, - 5] # 'im_id', 'gt_box', 'gt_label', 'is_difficult' - else: - idx_list = [1] # im_id - return self._add_label_by_fields(idx_list) - - def _ssd_add_loss(self): - if self.is_train_phase: - gt_box = self.labels[0] - gt_label = self.labels[1] - else: # xTodo: update here when using new module - gt_box = self.labels[1] - gt_label = self.labels[2] - mbox_locs, mbox_confs, box, box_var = self.env.mid_vars - loss = fluid.layers.ssd_loss( - location=mbox_locs, - confidence=mbox_confs, - gt_box=gt_box, - gt_label=gt_label, - prior_box=box, - prior_box_var=box_var) - loss = fluid.layers.reduce_sum(loss) - loss.persistable = True - return loss - - def _ssd_feed_list(self, for_export=False): - # xTodo: update when using new module - feed_list = [varname for varname in self.base_feed_list] - if self.is_train_phase: - feed_list = feed_list[:1] + [label.name for label in self.labels] - elif self.is_test_phase: - feed_list = feed_list + [label.name for label in self.labels] - else: # self.is_predict_phase: - if for_export: - feed_list = [feed_list[0]] - else: - # 'image', 'im_id', 'im_shape' - feed_list = [feed_list[0], self.labels[0].name, feed_list[1]] - return feed_list - - def _ssd_fetch_list(self, for_export=False): - # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase - if self.is_train_phase: - return [self.loss.name] - elif self.is_test_phase: - # xTodo: update when using new module - # im_id, bbox, dets, loss - return [ - self.base_feed_list[1], self.labels[0].name, - self.outputs[0].name, self.loss.name - ] - # im_shape, im_id, bbox - if for_export: - return [self.outputs[0].name] - else: - return [ - self.base_feed_list[1], self.labels[0].name, - self.outputs[0].name - ] - - def _rcnn_build_net(self): - if self.is_train_phase: - head_feat = self.feature[0] - else: - head_feat = self.predict_feature[0] - - # Rename following layers for: ValueError: Variable cls_score_w has been created before. - # the previous shape is (2048, 81); the new shape is (100352, 81). - # They are not matched. - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='my_cls_score', - param_attr=ParamAttr( - name='my_cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='my_cls_score_b', - learning_rate=2., - regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='my_bbox_pred', - param_attr=ParamAttr( - name='my_bbox_pred_w', initializer=Normal(loc=0.0, - scale=0.001)), - bias_attr=ParamAttr( - name='my_bbox_pred_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - if self.is_train_phase: - rpn_cls_loss, rpn_reg_loss, outs = self.feature[1:] - labels_int32 = outs[1] - bbox_targets = outs[2] - bbox_inside_weights = outs[3] - bbox_outside_weights = outs[4] - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = fluid.layers.smooth_l1( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights, - sigma=1.0) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - total_loss = fluid.layers.sum( - [loss_bbox, loss_cls, rpn_cls_loss, rpn_reg_loss]) - return [total_loss] - else: - rois = self.predict_feature[1] - im_info = self.base_feed_var_list[1] - im_shape = self.base_feed_var_list[2] - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, - (-1, self.num_classes, 4)) - # decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred) - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - target_box=bbox_pred, - code_type='decode_center_size', - box_normalized=False, - axis=1) - cliped_box = fluid.layers.box_clip( - input=decoded_box, im_info=im_shape) - # pred_result = self.nms(bboxes=cliped_box, scores=cls_prob) - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0) - if self.is_predict_phase: - self.env.labels = self._rcnn_add_label() - return [pred_result] - - def _rcnn_add_label(self): - if self.is_train_phase: - idx_list = [ - 2, - ] # 'im_id' - elif self.is_test_phase: - idx_list = [2, 4, 5, - 6] # 'im_id', 'gt_box', 'gt_label', 'is_difficult' - else: # predict - idx_list = [2] - return self._add_label_by_fields(idx_list) - - def _rcnn_add_loss(self): - if self.is_train_phase: - loss = self.env.outputs[-1] - else: - loss = fluid.layers.fill_constant( - shape=[1], value=-1, dtype='float32') - return loss - - def _rcnn_feed_list(self, for_export=False): - feed_list = [varname for varname in self.base_feed_list] - if self.is_train_phase: - # feed_list is ['image', 'im_info', 'gt_box', 'gt_label', 'is_crowd'] - return feed_list[:2] + [self.labels[0].name] + feed_list[2:] - elif self.is_test_phase: - # feed list is ['image', 'im_info', 'im_shape'] - return feed_list[:2] + [self.labels[0].name] + feed_list[2:] + \ - [label.name for label in self.labels[1:]] - if for_export: - # skip im_id - return feed_list[:2] + feed_list[3:] - else: - return feed_list[:2] + [self.labels[0].name] + feed_list[2:] - - def _rcnn_fetch_list(self, for_export=False): - # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase - if self.is_train_phase: - return [self.loss.name] - elif self.is_test_phase: - # im_shape, im_id, bbox - return [ - self.feed_list[2], self.labels[0].name, self.outputs[0].name, - self.loss.name - ] - - # im_shape, im_id, bbox - if for_export: - return [self.outputs[0].name] - else: - return [ - self.feed_list[2], self.labels[0].name, self.outputs[0].name - ] - - def _yolo_parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _yolo_build_net(self): - self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - self._yolo_parse_anchors(anchors) - - tip_list = self.feature - outputs = [] - for i, tip in enumerate(tip_list): - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - # Rename for: conflict with module pretrain weights - param_attr=ParamAttr( - name="ft_yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name="ft_yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if self.is_train_phase: - return outputs - - im_size = self.base_feed_var_list[1] - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=0.01, - downsample_ratio=downsample, - name="yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - downsample //= 2 - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - # pred = self.nms(bboxes=yolo_boxes, scores=yolo_scores) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=.01, - nms_top_k=1000, - keep_top_k=100, - nms_threshold=0.45, - normalized=False, - nms_eta=1.0, - background_label=-1) - if self.is_predict_phase: - self.env.labels = self._yolo_add_label() - return [pred] - - def _yolo_add_label(self): - if self.is_train_phase: - idx_list = [1, 2, 3] # 'gt_box', 'gt_label', 'gt_score' - elif self.is_test_phase: - idx_list = [2, 3, 4, - 5] # 'im_id', 'gt_box', 'gt_label', 'is_difficult' - else: # predict - idx_list = [2] - return self._add_label_by_fields(idx_list) - - def _yolo_add_loss(self): - if self.is_train_phase: - gt_box, gt_label, gt_score = self.labels - outputs = self.outputs - losses = [] - downsample = 32 - for i, output in enumerate(outputs): - anchor_mask = self.anchor_masks[i] - loss = fluid.layers.yolov3_loss( - x=output, - gt_box=gt_box, - gt_label=gt_label, - gt_score=gt_score, - anchors=self.anchors, - anchor_mask=anchor_mask, - class_num=self.num_classes, - ignore_thresh=0.7, - downsample_ratio=downsample, - use_label_smooth=True, - name="yolo_loss" + str(i)) - losses.append(fluid.layers.reduce_mean(loss)) - downsample //= 2 - - loss = sum(losses) - else: - loss = fluid.layers.fill_constant( - shape=[1], value=-1, dtype='float32') - return loss - - def _yolo_feed_list(self, for_export=False): - feed_list = [varname for varname in self.base_feed_list] - if self.is_train_phase: - return [feed_list[0]] + [label.name for label in self.labels] - elif self.is_test_phase: - return feed_list + [label.name for label in self.labels] - if for_export: - return feed_list[:2] - else: - return feed_list + [self.labels[0].name] - - def _yolo_fetch_list(self, for_export=False): - # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase - if self.is_train_phase: - return [self.loss.name] - elif self.is_test_phase: - # im_shape, im_id, bbox - return [ - self.feed_list[1], self.labels[0].name, self.outputs[0].name, - self.loss.name - ] - - # im_shape, im_id, bbox - if for_export: - return [self.outputs[0].name] - else: - return [ - self.feed_list[1], self.labels[0].name, self.outputs[0].name - ] - - def _build_net(self): - if self.model_type == 'ssd': - outputs = self._ssd_build_net() - elif self.model_type == 'rcnn': - outputs = self._rcnn_build_net() - elif self.model_type == 'yolo': - outputs = self._yolo_build_net() - else: - raise NotImplementedError - return outputs - - def _add_label(self): - if self.model_type == 'ssd': - labels = self._ssd_add_label() - elif self.model_type == 'rcnn': - labels = self._rcnn_add_label() - elif self.model_type == 'yolo': - labels = self._yolo_add_label() - else: - raise NotImplementedError - return labels - - def _add_loss(self): - if self.model_type == 'ssd': - loss = self._ssd_add_loss() - elif self.model_type == 'rcnn': - loss = self._rcnn_add_loss() - elif self.model_type == 'yolo': - loss = self._yolo_add_loss() - else: - raise NotImplementedError - return loss - - def _add_metrics(self): - return [] - - @property - def feed_list(self): - return self._feed_list(False) - - def _feed_list(self, for_export=False): - if self.model_type == 'ssd': - return self._ssd_feed_list(for_export) - elif self.model_type == 'rcnn': - return self._rcnn_feed_list(for_export) - elif self.model_type == 'yolo': - return self._yolo_feed_list(for_export) - else: - raise NotImplementedError - - @property - def fetch_list(self): - # ensure fetch loss at last element in train/test phase - # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase - return self._fetch_list(False) - - def _fetch_list(self, for_export=False): - if self.model_type == 'ssd': - return self._ssd_fetch_list(for_export) - elif self.model_type == 'rcnn': - return self._rcnn_fetch_list(for_export) - elif self.model_type == 'yolo': - return self._yolo_fetch_list(for_export) - else: - raise NotImplementedError - - @property - def fetch_var_list(self): - fetch_list = self._fetch_list(True) - vars = self.main_program.global_block().vars - return [vars[varname] for varname in fetch_list] - - @property - def labels(self): - if not self.env.is_inititalized: - self._build_env() - return self.env.labels - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None): - with self.phase_guard("predict"): - fluid.io.save_inference_model( - dirname=dirname, - executor=self.exe, - feeded_var_names=self._feed_list(for_export=True), - target_vars=self.fetch_var_list, - main_program=self.main_program, - model_filename=model_filename, - params_filename=params_filename) - def _calculate_metrics(self, run_states): loss_sum = run_examples = 0 run_step = run_time_used = 0 @@ -740,3 +227,51 @@ class DetectionTask(BaseTask): raise ValueError("Not Support Metric: \"%s\"" % metric) return scores, avg_loss, run_speed + + def _postprocessing(self, run_states): + """ + postprocessing the run result, get readable result. + + Args: + run_states (RunState): the raw run result to be processed + + Returns: + list: readable result + """ + results = [run_state.run_results for run_state in run_states] + for outs in results: + keys = ['im_shape', 'im_id', 'bbox'] + res = { + k: (np.array(v), v.recursive_sequence_lengths()) + for k, v in zip(keys, outs) + } + is_bbox_normalized = dconf.conf[ + self.model_type]['is_bbox_normalized'] + clsid2catid = {} + try: + items = self._base_data_reader.label_map.items() + except: + items = {idx: idx for idx in range(self.num_classes)}.items() + for k, v in items: + clsid2catid[v] = k + bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) + return bbox_results + + def _add_metrics(self): + return [] + + @property + def feed_list(self): + return self._feed_list(False) + + @property + def fetch_list(self): + # ensure fetch loss at last element in train/test phase + # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase + return self._fetch_list(False) + + @property + def fetch_var_list(self): + fetch_list = self._fetch_list(True) + vars = self.main_program.global_block().vars + return [vars[varname] for varname in fetch_list] diff --git a/paddlehub/finetune/task/faster_rcnn_task.py b/paddlehub/finetune/task/faster_rcnn_task.py new file mode 100644 index 0000000000000000000000000000000000000000..ea4eca503645a1ca3a2e84f86798ef156cf6bbe1 --- /dev/null +++ b/paddlehub/finetune/task/faster_rcnn_task.py @@ -0,0 +1,213 @@ +#coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import Normal + +from paddlehub.common.paddle_helper import clone_program +from paddlehub.finetune.task.detection_task import DetectionTask + + +class FasterRCNNTask(DetectionTask): + def __init__(self, + data_reader, + num_classes, + feed_list, + feature, + predict_feed_list=None, + predict_feature=None, + startup_program=None, + config=None, + metrics_choices="default"): + super(FasterRCNNTask, self).__init__( + data_reader=data_reader, + num_classes=num_classes, + feed_list=feed_list, + feature=feature, + model_type='rcnn', + startup_program=startup_program, + config=config, + metrics_choices=metrics_choices) + + self._base_feed_list = feed_list + self._base_predict_feed_list = predict_feed_list + self.feature = feature + self.predict_feature = predict_feature + self.num_classes = num_classes + if predict_feature: + self._base_predict_main_program = clone_program( + predict_feature[0].block.program, for_test=False) + else: + self._base_predict_main_program = None + + def _build_net(self): + if self.is_train_phase: + head_feat = self.feature[0] + else: + if self.is_predict_phase: + self.env.labels = self._add_label() + head_feat = self.main_program.global_block().vars[ + self.predict_feature[0].name] + + # Rename following layers for: ValueError: Variable cls_score_w has been created before. + # the previous shape is (2048, 81); the new shape is (100352, 81). + # They are not matched. + cls_score = fluid.layers.fc( + input=head_feat, + size=self.num_classes, + act=None, + name='my_cls_score', + param_attr=ParamAttr( + name='my_cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), + bias_attr=ParamAttr( + name='my_cls_score_b', + learning_rate=2., + regularizer=L2Decay(0.))) + bbox_pred = fluid.layers.fc( + input=head_feat, + size=4 * self.num_classes, + act=None, + name='my_bbox_pred', + param_attr=ParamAttr( + name='my_bbox_pred_w', initializer=Normal(loc=0.0, + scale=0.001)), + bias_attr=ParamAttr( + name='my_bbox_pred_b', + learning_rate=2., + regularizer=L2Decay(0.))) + + if self.is_train_phase: + rpn_cls_loss, rpn_reg_loss, outs = self.feature[1:] + labels_int32 = outs[1] + bbox_targets = outs[2] + bbox_inside_weights = outs[3] + bbox_outside_weights = outs[4] + labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') + labels_int64.stop_gradient = True + loss_cls = fluid.layers.softmax_with_cross_entropy( + logits=cls_score, label=labels_int64, numeric_stable_mode=True) + loss_cls = fluid.layers.reduce_mean(loss_cls) + loss_bbox = fluid.layers.smooth_l1( + x=bbox_pred, + y=bbox_targets, + inside_weight=bbox_inside_weights, + outside_weight=bbox_outside_weights, + sigma=1.0) + loss_bbox = fluid.layers.reduce_mean(loss_bbox) + total_loss = fluid.layers.sum( + [loss_bbox, loss_cls, rpn_cls_loss, rpn_reg_loss]) + return [total_loss] + else: + rois = self.main_program.global_block().vars[ + self.predict_feature[1].name] + im_info = self.feed_var_list[1] + im_shape = self.feed_var_list[3] + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + boxes = rois / im_scale + cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) + bbox_pred = fluid.layers.reshape(bbox_pred, + (-1, self.num_classes, 4)) + # decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred) + decoded_box = fluid.layers.box_coder( + prior_box=boxes, + prior_box_var=[0.1, 0.1, 0.2, 0.2], + target_box=bbox_pred, + code_type='decode_center_size', + box_normalized=False, + axis=1) + cliped_box = fluid.layers.box_clip( + input=decoded_box, im_info=im_shape) + # pred_result = self.nms(bboxes=cliped_box, scores=cls_prob) + pred_result = fluid.layers.multiclass_nms( + bboxes=decoded_box, + scores=cls_prob, + score_threshold=.05, + nms_top_k=-1, + keep_top_k=100, + nms_threshold=.5, + normalized=False, + nms_eta=1.0, + background_label=0) + return [pred_result] + + def _add_label(self): + if self.is_train_phase: + # 'im_id' + idx_list = [2] + elif self.is_test_phase: + # 'im_id', 'gt_box', 'gt_label', 'is_difficult' + idx_list = [2, 4, 5, 6] + else: # predict + idx_list = [2] + return self._add_label_by_fields(idx_list) + + def _add_loss(self): + if self.is_train_phase: + loss = self.env.outputs[-1] + else: + loss = fluid.layers.fill_constant( + shape=[1], value=-1, dtype='float32') + return loss + + def _feed_list(self, for_export=False): + if self.is_train_phase: + feed_list = [varname for varname in self._base_feed_list] + else: + feed_list = [varname for varname in self._base_predict_feed_list] + + if self.is_train_phase: + # feed_list is ['image', 'im_info', 'gt_box', 'gt_label', 'is_crowd'] + return feed_list[:2] + [self.labels[0].name] + feed_list[2:] + elif self.is_test_phase: + # feed list is ['image', 'im_info', 'im_shape'] + return feed_list[:2] + [self.labels[0].name] + feed_list[2:] + \ + [label.name for label in self.labels[1:]] + if for_export: + # skip im_id + return feed_list[:2] + feed_list[3:] + else: + return feed_list[:2] + [self.labels[0].name] + feed_list[2:] + + def _fetch_list(self, for_export=False): + # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase + if self.is_train_phase: + return [self.loss.name] + elif self.is_test_phase: + # im_shape, im_id, bbox + return [ + self.feed_list[2], self.labels[0].name, self.outputs[0].name, + self.loss.name + ] + + # im_shape, im_id, bbox + if for_export: + return [self.outputs[0].name] + else: + return [ + self.feed_list[2], self.labels[0].name, self.outputs[0].name + ] + + @property + def base_main_program(self): + if self.is_train_phase: + return self._base_main_program + return self._base_predict_main_program diff --git a/paddlehub/finetune/task/ssd_task.py b/paddlehub/finetune/task/ssd_task.py new file mode 100644 index 0000000000000000000000000000000000000000..4b99e7c14a590321e1fccc4c4a2531a298605f81 --- /dev/null +++ b/paddlehub/finetune/task/ssd_task.py @@ -0,0 +1,144 @@ +#coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid + +from paddlehub.finetune.task.detection_task import DetectionTask + + +class SSDTask(DetectionTask): + def __init__(self, + data_reader, + num_classes, + feed_list, + feature, + multi_box_head_config, + startup_program=None, + config=None, + metrics_choices="default"): + super(SSDTask, self).__init__( + data_reader=data_reader, + num_classes=num_classes, + feed_list=feed_list, + feature=feature, + model_type='ssd', + startup_program=startup_program, + config=config, + metrics_choices=metrics_choices) + + self._base_feed_list = feed_list + self.feature = feature + self.num_classes = num_classes + self.multi_box_head_config = multi_box_head_config + + def _build_net(self): + if self.is_predict_phase: # add im_id + self.env.labels = self._add_label() + + feature_list = self.feature + image = self.feed_var_list[0] + + # fix input size according to its module + mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head( + inputs=feature_list, + image=image, + num_classes=self.num_classes, + **self.multi_box_head_config) + + self.env.mid_vars = [mbox_locs, mbox_confs, box, box_var] + + nmsed_out = fluid.layers.detection_output( + mbox_locs, + mbox_confs, + box, + box_var, + background_label=0, + nms_threshold=0.45, + nms_top_k=400, + keep_top_k=200, + score_threshold=0.01, + nms_eta=1.0) + + return [nmsed_out] + + def _add_label(self): + if self.is_train_phase: + # 'gt_box', 'gt_label' + idx_list = [1, 2] + elif self.is_test_phase: + # 'im_id', 'gt_box', 'gt_label', 'is_difficult' + idx_list = [2, 3, 4, 5] + else: + # im_id + idx_list = [1] + return self._add_label_by_fields(idx_list) + + def _add_loss(self): + if self.is_train_phase: + gt_box = self.labels[0] + gt_label = self.labels[1] + else: # xTodo: update here when using new module + gt_box = self.labels[1] + gt_label = self.labels[2] + mbox_locs, mbox_confs, box, box_var = self.env.mid_vars + loss = fluid.layers.ssd_loss( + location=mbox_locs, + confidence=mbox_confs, + gt_box=gt_box, + gt_label=gt_label, + prior_box=box, + prior_box_var=box_var) + loss = fluid.layers.reduce_sum(loss) + loss.persistable = True + return loss + + def _feed_list(self, for_export=False): + # todo: update when using new module + feed_list = [varname for varname in self._base_feed_list] + if self.is_train_phase: + feed_list = feed_list[:1] + [label.name for label in self.labels] + elif self.is_test_phase: + feed_list = feed_list + [label.name for label in self.labels] + else: # self.is_predict_phase: + if for_export: + feed_list = [feed_list[0]] + else: + # 'image', 'im_id', 'im_shape' + feed_list = [feed_list[0], self.labels[0].name, feed_list[1]] + return feed_list + + def _fetch_list(self, for_export=False): + # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase + if self.is_train_phase: + return [self.loss.name] + elif self.is_test_phase: + # xTodo: update when using new module + # im_id, bbox, dets, loss + return [ + self._base_feed_list[1], self.labels[0].name, + self.outputs[0].name, self.loss.name + ] + # im_shape, im_id, bbox + if for_export: + return [self.outputs[0].name] + else: + return [ + self._base_feed_list[1], self.labels[0].name, + self.outputs[0].name + ] diff --git a/paddlehub/finetune/task/yolo_task.py b/paddlehub/finetune/task/yolo_task.py new file mode 100644 index 0000000000000000000000000000000000000000..fd852422038e8924327bb82be791728e679a175d --- /dev/null +++ b/paddlehub/finetune/task/yolo_task.py @@ -0,0 +1,200 @@ +#coding:utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.regularizer import L2Decay + +from paddlehub.finetune.task.detection_task import DetectionTask + + +class YOLOTask(DetectionTask): + def __init__(self, + data_reader, + num_classes, + feed_list, + feature, + startup_program=None, + config=None, + metrics_choices="default"): + super(YOLOTask, self).__init__( + data_reader=data_reader, + num_classes=num_classes, + feed_list=feed_list, + feature=feature, + model_type='yolo', + startup_program=startup_program, + config=config, + metrics_choices=metrics_choices) + + self._base_feed_list = feed_list + self.feature = feature + self.num_classes = num_classes + + def _parse_anchors(self, anchors): + """ + Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors + """ + self.anchors = [] + self.mask_anchors = [] + + assert len(anchors) > 0, "ANCHORS not set." + assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." + + for anchor in anchors: + assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) + self.anchors.extend(anchor) + + anchor_num = len(anchors) + for masks in self.anchor_masks: + self.mask_anchors.append([]) + for mask in masks: + assert mask < anchor_num, "anchor mask index overflow" + self.mask_anchors[-1].extend(anchors[mask]) + + def _build_net(self): + if self.is_predict_phase: + self.env.labels = self._add_label() + self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + self._parse_anchors(anchors) + + tip_list = self.feature + outputs = [] + for i, tip in enumerate(tip_list): + # out channel number = mask_num * (5 + class_num) + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + block_out = fluid.layers.conv2d( + input=tip, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + # Rename for: conflict with module pretrain weights + param_attr=ParamAttr( + name="ft_yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), + name="ft_yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + if self.is_train_phase: + return outputs + + im_size = self.feed_var_list[1] + boxes = [] + scores = [] + downsample = 32 + for i, output in enumerate(outputs): + box, score = fluid.layers.yolo_box( + x=output, + img_size=im_size, + anchors=self.mask_anchors[i], + class_num=self.num_classes, + conf_thresh=0.01, + downsample_ratio=downsample, + name="yolo_box" + str(i)) + boxes.append(box) + scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) + downsample //= 2 + yolo_boxes = fluid.layers.concat(boxes, axis=1) + yolo_scores = fluid.layers.concat(scores, axis=2) + # pred = self.nms(bboxes=yolo_boxes, scores=yolo_scores) + pred = fluid.layers.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=.01, + nms_top_k=1000, + keep_top_k=100, + nms_threshold=0.45, + normalized=False, + nms_eta=1.0, + background_label=-1) + return [pred] + + def _add_label(self): + if self.is_train_phase: + idx_list = [1, 2, 3] # 'gt_box', 'gt_label', 'gt_score' + elif self.is_test_phase: + idx_list = [2, 3, 4, + 5] # 'im_id', 'gt_box', 'gt_label', 'is_difficult' + else: # predict + idx_list = [2] + return self._add_label_by_fields(idx_list) + + def _add_loss(self): + if self.is_train_phase: + gt_box, gt_label, gt_score = self.labels + outputs = self.outputs + losses = [] + downsample = 32 + for i, output in enumerate(outputs): + anchor_mask = self.anchor_masks[i] + loss = fluid.layers.yolov3_loss( + x=output, + gt_box=gt_box, + gt_label=gt_label, + gt_score=gt_score, + anchors=self.anchors, + anchor_mask=anchor_mask, + class_num=self.num_classes, + ignore_thresh=0.7, + downsample_ratio=downsample, + use_label_smooth=True, + name="yolo_loss" + str(i)) + losses.append(fluid.layers.reduce_mean(loss)) + downsample //= 2 + + loss = sum(losses) + else: + loss = fluid.layers.fill_constant( + shape=[1], value=-1, dtype='float32') + return loss + + def _feed_list(self, for_export=False): + feed_list = [varname for varname in self._base_feed_list] + if self.is_train_phase: + return [feed_list[0]] + [label.name for label in self.labels] + elif self.is_test_phase: + return feed_list + [label.name for label in self.labels] + if for_export: + return feed_list[:2] + else: + return feed_list + [self.labels[0].name] + + def _fetch_list(self, for_export=False): + # ensure fetch 'im_shape', 'im_id', 'bbox' at first three elements in test phase + if self.is_train_phase: + return [self.loss.name] + elif self.is_test_phase: + # im_shape, im_id, bbox + return [ + self.feed_list[1], self.labels[0].name, self.outputs[0].name, + self.loss.name + ] + + # im_shape, im_id, bbox + if for_export: + return [self.outputs[0].name] + else: + return [ + self.feed_list[1], self.labels[0].name, self.outputs[0].name + ]