diff --git a/docs/EXPORT_MODEL.md b/docs/EXPORT_MODEL.md index 614d87e29c5e5d5685f64c9ee78cf7d1d5192d09..2d06c7604e4a97e8b74a99d07571d2d2878f4ca0 100644 --- a/docs/EXPORT_MODEL.md +++ b/docs/EXPORT_MODEL.md @@ -14,14 +14,14 @@ 使用[训练/评估/推断](GETTING_STARTED_cn.md)中训练得到的模型进行试用,脚本如下 ```bash -# 导出FasterRCNN模型, 模型中data层默认的shape为3x800x1333 +# 导出FasterRCNN模型 python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \ --output_dir=./inference_model \ -o weights=output/faster_rcnn_r50_1x/model_final \ ``` -预测模型会导出到`inference_model/faster_rcnn_r50_1x`目录下,模型名和参数名分别为`__model__`和`__params__`。 +- 预测模型会导出到`inference_model/faster_rcnn_r50_1x`目录下,模型名和参数名分别为`__model__`和`__params__`。 ## 设置导出模型的输入大小 @@ -46,3 +46,5 @@ python tools/export_model.py -c configs/ssd/ssd_mobilenet_v1_voc.yml \ -o weights= https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar \ SSDTestFeed.image_shape=[3,300,300] ``` + +- 保存FPN系列模型时,需要保证上下采样维度一致,因此image_shape须设置为32的倍数 diff --git a/ppdet/data/data_feed.py b/ppdet/data/data_feed.py index 0de5ad1ab97a5a42805af3f4664c60d3ac143558..110fd68524263a2bd80f0d02b47d1f9ea5e1903c 100644 --- a/ppdet/data/data_feed.py +++ b/ppdet/data/data_feed.py @@ -453,7 +453,7 @@ class FasterRCNNTrainFeed(DataFeed): 'image', 'im_info', 'im_id', 'gt_box', 'gt_label', 'is_crowd' ], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), RandomFlipImage(prob=0.5), @@ -505,7 +505,7 @@ class FasterRCNNEvalFeed(DataFeed): COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_info', 'im_id', 'im_shape', 'gt_box', 'gt_label', 'is_difficult'], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), NormalizeImage(mean=[0.485, 0.456, 0.406], @@ -552,7 +552,7 @@ class FasterRCNNTestFeed(DataFeed): dataset=SimpleDataSet(COCO_VAL_ANNOTATION, COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), NormalizeImage(mean=[0.485, 0.456, 0.406], @@ -600,7 +600,7 @@ class MaskRCNNTrainFeed(DataFeed): 'image', 'im_info', 'im_id', 'gt_box', 'gt_label', 'is_crowd', 'gt_mask' ], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), RandomFlipImage(prob=0.5, is_mask_flip=True), @@ -646,7 +646,7 @@ class MaskRCNNEvalFeed(DataFeed): dataset=CocoDataSet(COCO_VAL_ANNOTATION, COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), NormalizeImage(mean=[0.485, 0.456, 0.406], @@ -698,7 +698,7 @@ class MaskRCNNTestFeed(DataFeed): dataset=SimpleDataSet(COCO_VAL_ANNOTATION, COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[3, 800, 1333], + image_shape=[None, 3, None, None], sample_transforms=[ DecodeImage(to_rgb=True), NormalizeImage( @@ -743,7 +743,7 @@ class SSDTrainFeed(DataFeed): def __init__(self, dataset=VocDataSet().__dict__, fields=['image', 'gt_box', 'gt_label'], - image_shape=[3, 300, 300], + image_shape=[None, 3, 300, 300], sample_transforms=[ DecodeImage(to_rgb=True, with_mixup=False), NormalizeBox(), @@ -802,7 +802,7 @@ class SSDEvalFeed(DataFeed): dataset=VocDataSet(VOC_VAL_ANNOTATION).__dict__, fields=['image', 'im_shape', 'im_id', 'gt_box', 'gt_label', 'is_difficult'], - image_shape=[3, 300, 300], + image_shape=[None, 3, 300, 300], sample_transforms=[ DecodeImage(to_rgb=True, with_mixup=False), NormalizeBox(), @@ -847,7 +847,7 @@ class SSDTestFeed(DataFeed): def __init__(self, dataset=SimpleDataSet(VOC_VAL_ANNOTATION).__dict__, fields=['image', 'im_id', 'im_shape'], - image_shape=[3, 300, 300], + image_shape=[None, 3, 300, 300], sample_transforms=[ DecodeImage(to_rgb=True), ResizeImage(target_size=300, use_cv2=False, interp=1), @@ -893,7 +893,7 @@ class YoloTrainFeed(DataFeed): def __init__(self, dataset=CocoDataSet().__dict__, fields=['image', 'gt_box', 'gt_label', 'gt_score'], - image_shape=[3, 608, 608], + image_shape=[None, 3, 608, 608], sample_transforms=[ DecodeImage(to_rgb=True, with_mixup=True), MixupImage(alpha=1.5, beta=1.5), @@ -955,7 +955,7 @@ class YoloEvalFeed(DataFeed): COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_size', 'im_id', 'gt_box', 'gt_label', 'is_difficult'], - image_shape=[3, 608, 608], + image_shape=[None, 3, 608, 608], sample_transforms=[ DecodeImage(to_rgb=True), ResizeImage(target_size=608, interp=2), @@ -1013,7 +1013,7 @@ class YoloTestFeed(DataFeed): dataset=SimpleDataSet(COCO_VAL_ANNOTATION, COCO_VAL_IMAGE_DIR).__dict__, fields=['image', 'im_size', 'im_id'], - image_shape=[3, 608, 608], + image_shape=[None, 3, 608, 608], sample_transforms=[ DecodeImage(to_rgb=True), ResizeImage(target_size=608, interp=2), diff --git a/ppdet/modeling/model_input.py b/ppdet/modeling/model_input.py index 0e7e3bf05756ff2fc271336acb7a5be98db2bd88..376438963a1d53cf0fe2126592af0421cd82a508 100644 --- a/ppdet/modeling/model_input.py +++ b/ppdet/modeling/model_input.py @@ -25,16 +25,16 @@ __all__ = ['create_feed'] # yapf: disable feed_var_def = [ - {'name': 'im_info', 'shape': [3], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_id', 'shape': [1], 'dtype': 'int32', 'lod_level': 0}, - {'name': 'gt_box', 'shape': [4], 'dtype': 'float32', 'lod_level': 1}, - {'name': 'gt_label', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'is_crowd', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'gt_mask', 'shape': [2], 'dtype': 'float32', 'lod_level': 3}, - {'name': 'is_difficult', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'gt_score', 'shape': [1], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_shape', 'shape': [3], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_size', 'shape': [2], 'dtype': 'int32', 'lod_level': 0}, + {'name': 'im_info', 'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0}, + {'name': 'im_id', 'shape': [None, 1], 'dtype': 'int32', 'lod_level': 0}, + {'name': 'gt_box', 'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1}, + {'name': 'gt_label', 'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1}, + {'name': 'is_crowd', 'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1}, + {'name': 'gt_mask', 'shape': [None, 2], 'dtype': 'float32', 'lod_level': 3}, + {'name': 'is_difficult', 'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1}, + {'name': 'gt_score', 'shape': [None, 1], 'dtype': 'float32', 'lod_level': 0}, + {'name': 'im_shape', 'shape': [None, 3], 'dtype': 'float32', 'lod_level': 0}, + {'name': 'im_size', 'shape': [None, 2], 'dtype': 'int32', 'lod_level': 0}, ] # yapf: enable @@ -52,10 +52,10 @@ def create_feed(feed, iterable=False, sub_prog_feed=False): # tensor padding with 0 is used instead of LoD tensor when # num_max_boxes is set if getattr(feed, 'num_max_boxes', None) is not None: - feed_var_map['gt_label']['shape'] = [feed.num_max_boxes] - feed_var_map['gt_score']['shape'] = [feed.num_max_boxes] - feed_var_map['gt_box']['shape'] = [feed.num_max_boxes, 4] - feed_var_map['is_difficult']['shape'] = [feed.num_max_boxes] + feed_var_map['gt_label']['shape'] = [None, feed.num_max_boxes] + feed_var_map['gt_score']['shape'] = [None, feed.num_max_boxes] + feed_var_map['gt_box']['shape'] = [None, feed.num_max_boxes, 4] + feed_var_map['is_difficult']['shape'] = [None, feed.num_max_boxes] feed_var_map['gt_label']['lod_level'] = 0 feed_var_map['gt_score']['lod_level'] = 0 feed_var_map['gt_box']['lod_level'] = 0 @@ -113,7 +113,7 @@ def create_feed(feed, iterable=False, sub_prog_feed=False): feed.fields = feed.fields + [box_name] feed_var_map[box_name] = sub_prog_feed - feed_vars = OrderedDict([(key, fluid.layers.data( + feed_vars = OrderedDict([(key, fluid.data( name=feed_var_map[key]['name'], shape=feed_var_map[key]['shape'], dtype=feed_var_map[key]['dtype'],