diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index e5fc2800e2678aa26a15c9fa78d2de9b2e6e58ea..e42288fbccd434ef5953c606696af623323aa80d 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -66,7 +66,7 @@ int main(int argc, char** argv) { std::cout << "image file: " << image_path << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " @@ -89,7 +89,7 @@ int main(int argc, char** argv) { for (int i = 0; i < result.boxes.size(); ++i) { std::cout << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md index 4fa083df17d5d87c1b9755e9c224bfd236ebc6b6..14bbcb1efd05b8afcc05027d5046942fa161e406 100755 --- a/docs/apis/models/classification.md +++ b/docs/apis/models/classification.md @@ -35,7 +35,7 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -186,3 +186,7 @@ paddlex.cls.DenseNet161(num_classes=1000) paddlex.cls.DenseNet201(num_classes=1000) ``` +### HRNet_W18 +```python +paddlex.cls.HRNet_W18(num_classes=1000) +``` diff --git a/docs/apis/models/detection.md b/docs/apis/models/detection.md index 2039be6db803d0f0c4335346e7b6127565c684af..dbd3130b115abc0d81a53cbc4aad5d0d08d73734 100755 --- a/docs/apis/models/detection.md +++ b/docs/apis/models/detection.md @@ -9,7 +9,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_ > 构建YOLOv3检测器。**注意在YOLOv3,num_classes不需要包含背景类,如目标包括human、dog两种,则num_classes设为2即可,这里与FasterRCNN/MaskRCNN有差别** > **参数** -> +> > > - **num_classes** (int): 类别数。默认为80。 > > - **backbone** (str): YOLOv3的backbone网络,取值范围为['DarkNet53', 'ResNet34', 'MobileNetV1', 'MobileNetV3_large']。默认为'MobileNetV1'。 > > - **anchors** (list|tuple): anchor框的宽度和高度,为None时表示使用默认值 @@ -53,7 +53,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -107,7 +107,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec > **参数** > > - **num_classes** (int): 包含了背景类的类别数。默认为81。 -> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 +> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md index f08645478cdafa845431ed52b68387f672886d17..e3f3f720adda70d7649234a96dca28dc7133bc4b 100755 --- a/docs/apis/models/instance_segmentation.md +++ b/docs/apis/models/instance_segmentation.md @@ -12,7 +12,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_ > **参数** > > - **num_classes** (int): 包含了背景类的类别数。默认为81。 -> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 +> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -82,4 +82,4 @@ predict(self, img_file, transforms=None) > > **返回值** > -> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。 +> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。 diff --git a/docs/apis/models/semantic_segmentation.md b/docs/apis/models/semantic_segmentation.md index 12b54d8c6114466771e25bdd88d21466018ed5aa..2321b45c61e4b44e8620543cab1711671929c5f6 100755 --- a/docs/apis/models/semantic_segmentation.md +++ b/docs/apis/models/semantic_segmentation.md @@ -47,7 +47,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -124,7 +124,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认'output' -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 @@ -173,3 +173,88 @@ predict(self, im_file, transforms=None): > **返回值** > > > > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 + + +## HRNet类 + +```python +paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255) +``` + +> 构建HRNet分割器。 + +> **参数** + +> > - **num_classes** (int): 类别数。 +> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 +> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 +> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 +> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 +> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。 + +### train 训练接口 + +```python +train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None): +``` + +> HRNet模型训练接口。 + +> **参数** +> > +> > - **num_epochs** (int): 训练迭代轮数。 +> > - **train_dataset** (paddlex.datasets): 训练数据读取器。 +> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 +> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 +> > - **save_dir** (str): 模型保存路径。默认'output' +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。 +> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 +> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 +> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 +> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 +> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 +> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + +#### evaluate 评估接口 + +``` +evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): +``` + +> HRNet模型评估接口。 + +> **参数** +> > +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **batch_size** (int): 评估时的batch大小。默认1。 +> > - **epoch_id** (int): 当前评估模型所在的训练轮数。 +> > - **return_details** (bool): 是否返回详细信息。默认False。 + +> **返回值** +> > +> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 +> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 +> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), +> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 + +#### predict 预测接口 + +``` +predict(self, im_file, transforms=None): +``` + +> HRNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。 + +> **参数** +> > +> > - **img_file** (str): 预测图像路径。 +> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。 + +> **返回值** +> > +> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 diff --git a/docs/apis/transforms/augment.md b/docs/apis/transforms/augment.md index 4ed04ca064cad113a2375dc3375d651572b374c1..f8c66b45d2d853fde57c520b079f9974e5fa4d76 100644 --- a/docs/apis/transforms/augment.md +++ b/docs/apis/transforms/augment.md @@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 | :------- | :------------| | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、
[RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) | |目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) | -|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | +|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotate](seg_transforms.html#randomrotate)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | ## imgaug增强库的支持 diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md index 1708290680e767b3a06615f0b789279b716433ea..d95d8a4d9a35723b0f489fa972ed28fcadd1d211 100755 --- a/docs/apis/transforms/seg_transforms.md +++ b/docs/apis/transforms/seg_transforms.md @@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1) * **prob** (float): 图像模糊概率。默认为0.1。 -## RandomRotation类 +## RandomRotate类 ```python paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255) ``` diff --git a/docs/appendix/model_zoo.md b/docs/appendix/model_zoo.md index f48ea51361f85c6fdeb22c6c03e8d6f982fbd439..c75f41729d60761f326284839f3a9dd2addeaddc 100644 --- a/docs/appendix/model_zoo.md +++ b/docs/appendix/model_zoo.md @@ -27,6 +27,7 @@ | DenseNet161|116.3MB | 8.863 | 78.6 | 94.1 | | DenseNet201| 84.6MB | 8.173 | 77.6 | 93.7 | | ShuffleNetV2 | 9.0MB | 10.941 | 68.8 | 88.5 | +| HRNet_W18 | 21.29MB | 7.368 (V100 GPU) | 76.9 | 93.4 | ## 目标检测模型 @@ -41,6 +42,7 @@ |FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 | |FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 | |FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 | +|FasterRCNN-HRNet_W18-FPN |115.5MB | 57.11 | 36 | |YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 | |YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 | |YOLOv3-MobileNetv3|94.6MB | - | 31.6 | @@ -49,4 +51,3 @@ ## 实例分割模型 > 表中模型相关指标均为在MSCOCO数据集上测试得到。 - diff --git a/docs/slim/index.rst b/docs/slim/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..48a16f6e08f3f80a7048d1666719b9b08e150362 --- /dev/null +++ b/docs/slim/index.rst @@ -0,0 +1,8 @@ +模型压缩 +============================ + +.. toctree:: + :maxdepth: 2 + + prune.md + quant.md diff --git a/docs/slim/prune.md b/docs/slim/prune.md new file mode 100644 index 0000000000000000000000000000000000000000..c1ff51e5e08c2ce8da5e2042d0a1c359a9e64dff --- /dev/null +++ b/docs/slim/prune.md @@ -0,0 +1,54 @@ +# 模型裁剪 + +## 原理介绍 + +模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。 + +## 裁剪方法 +PaddleX提供了两种方式: + +**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,** +> **第一步**: 使用数据集训练原始模型 +> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件 +> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数) + +> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。 +> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。 + +**2.使用PaddleX内置的裁剪方案** +> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步, + +> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串 + +> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集 + +## 裁剪实验 +基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示, + +### 图像分类 +实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md) + +| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- |:----------| +|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms | +|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms | +|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms | + +### 目标检测 +实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md) + + +| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- | :---------| +|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms | +|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms | +|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms | + +### 语义分割 +实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md) + +| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- | :---------| +|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms | +|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms | +|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms | diff --git a/docs/slim/quant.md b/docs/slim/quant.md new file mode 100644 index 0000000000000000000000000000000000000000..1686a9fb8d33e770d55a378ebdf76876058514fb --- /dev/null +++ b/docs/slim/quant.md @@ -0,0 +1,11 @@ +# 模型量化 + +## 原理介绍 +为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。 + + +## 使用PaddleX量化模型 +PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)。 + +## 量化性能对比 +模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html) diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md index ebd27b61559eb2e0e4146f3642b2637cb6ab70e2..838195f14ff108de838f04d5514101b17280f4dd 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md @@ -96,6 +96,17 @@ cmake .. \ make ``` +**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载: + +- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2) +- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip) +- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip) + +opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。 + +paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。 + +yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。 修改脚本设置好主要参数后,执行`build`脚本: ```shell @@ -104,8 +115,9 @@ make ### Step5: 预测及可视化 -参考[导出inference模型](../../deploy_python.html#inference)将模型导出为inference格式模型。 -**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** +**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** + +> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: @@ -117,7 +129,7 @@ make | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | -| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | +| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** | ## 样例 diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md index 0f70f7f90131bfb354b2eee493b6d863b99f3dcc..e319df76ccc7ab4308b0a0b295eb412d9d89c2fe 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md @@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens | OPENCV_DIR | OpenCV的安装路径, | | PADDLE_DIR | Paddle c++预测库的路径 | -**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾 +**注意:** +1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 + +2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾 + +3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip) + +yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。 ![step4](../../images/vs2019_step5.png) @@ -99,8 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ### Step5: 预测及可视化 -参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 -**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** + +**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** + +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: diff --git a/paddlex/__init__.py b/paddlex/__init__.py index de762df7ad7dc01670e795b93f709bb23a08f1c8..972210bdb80c445e59d4a8ed10418ee988bd353c 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -53,4 +53,4 @@ log_level = 2 from . import interpret -__version__ = '1.0.2.github' +__version__ = '1.0.4' diff --git a/paddlex/cls.py b/paddlex/cls.py index e440c726b639ac6d78cc3c62dd8ef2df7bf8a327..0dce289d7ee77c9559a4fce2104cca8786b81f52 100644 --- a/paddlex/cls.py +++ b/paddlex/cls.py @@ -36,5 +36,6 @@ DenseNet121 = cv.models.DenseNet121 DenseNet161 = cv.models.DenseNet161 DenseNet201 = cv.models.DenseNet201 ShuffleNetV2 = cv.models.ShuffleNetV2 +HRNet_W18 = cv.models.HRNet_W18 transforms = cv.transforms.cls_transforms diff --git a/paddlex/cv/datasets/dataset.py b/paddlex/cv/datasets/dataset.py index c3bec8997cb0a04590f4946dc87b0eb8bd3a0c43..3cc8b52c10f41ce17bbe8da7fd5289b96aac409e 100644 --- a/paddlex/cv/datasets/dataset.py +++ b/paddlex/cv/datasets/dataset.py @@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data): padding_batch = [] for data in batch_data: im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) + padding_im = np.zeros( + (im_c, max_shape[1], max_shape[2]), dtype=np.float32) padding_im[:, :im_h, :im_w] = data[0] padding_batch.append((padding_im, ) + data[1:]) return padding_batch @@ -226,8 +226,8 @@ class Dataset: if num_workers == 'auto': import multiprocessing as mp num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 - if platform.platform().startswith( - "Darwin") or platform.platform().startswith("Windows"): + if platform.platform().startswith("Darwin") or platform.platform( + ).startswith("Windows"): parallel_method = 'thread' if transforms is None: raise Exception("transform should be defined.") diff --git a/paddlex/cv/models/__init__.py b/paddlex/cv/models/__init__.py index 6c75179f893c286be9c00dc98d96ee1768e4a3ec..22485f2701e1e06c6e050c0c15238c32ed4a6a02 100644 --- a/paddlex/cv/models/__init__.py +++ b/paddlex/cv/models/__init__.py @@ -34,11 +34,13 @@ from .classifier import DenseNet121 from .classifier import DenseNet161 from .classifier import DenseNet201 from .classifier import ShuffleNetV2 +from .classifier import HRNet_W18 from .base import BaseAPI from .yolo_v3 import YOLOv3 from .faster_rcnn import FasterRCNN from .mask_rcnn import MaskRCNN from .unet import UNet from .deeplabv3p import DeepLabv3p +from .hrnet import HRNet from .load_model import load_model from .slim import prune diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index 9d66df7009ff4daf09112b4709e30c39eb38ab67..ac8989ff83980bf45d7705985353435e6e19a9e6 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -79,9 +79,9 @@ class BaseAPI: return int(batch_size // len(self.places)) else: raise Exception("Please support correct batch_size, \ - which can be divided by available cards({}) in {}". - format(paddlex.env_info['num'], - paddlex.env_info['place'])) + which can be divided by available cards({}) in {}" + .format(paddlex.env_info['num'], paddlex.env_info[ + 'place'])) def build_program(self): # 构建训练网络 @@ -198,6 +198,8 @@ class BaseAPI: backbone = self.backbone else: backbone = self.__class__.__name__ + if backbone == "HRNet": + backbone = backbone + "_W{}".format(self.width) pretrain_weights = get_pretrain_weights( pretrain_weights, self.model_type, backbone, pretrain_dir) if startup_prog is None: @@ -210,8 +212,8 @@ class BaseAPI: paddlex.utils.utils.load_pretrain_weights( self.exe, self.train_prog, resume_checkpoint, resume=True) if not osp.exists(osp.join(resume_checkpoint, "model.yml")): - raise Exception( - "There's not model.yml in {}".format(resume_checkpoint)) + raise Exception("There's not model.yml in {}".format( + resume_checkpoint)) with open(osp.join(resume_checkpoint, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) self.completed_epochs = info['completed_epochs'] @@ -269,13 +271,13 @@ class BaseAPI: except: pass - if hasattr(self.test_transforms, 'to_rgb'): - if self.test_transforms.to_rgb: - info['TransformsMode'] = 'RGB' - else: - info['TransformsMode'] = 'BGR' - if hasattr(self, 'test_transforms'): + if hasattr(self.test_transforms, 'to_rgb'): + if self.test_transforms.to_rgb: + info['TransformsMode'] = 'RGB' + else: + info['TransformsMode'] = 'BGR' + if self.test_transforms is not None: info['Transforms'] = list() for op in self.test_transforms.transforms: @@ -362,8 +364,8 @@ class BaseAPI: # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() - logging.info( - "Model for inference deploy saved in {}.".format(save_dir)) + logging.info("Model for inference deploy saved in {}.".format( + save_dir)) def train_loop(self, num_epochs, @@ -377,7 +379,8 @@ class BaseAPI: early_stop=False, early_stop_patience=5): if train_dataset.num_samples < train_batch_size: - raise Exception('The amount of training datset must be larger than batch size.') + raise Exception( + 'The amount of training datset must be larger than batch size.') if not osp.isdir(save_dir): if osp.exists(save_dir): os.remove(save_dir) @@ -415,8 +418,8 @@ class BaseAPI: build_strategy=build_strategy, exec_strategy=exec_strategy) - total_num_steps = math.floor( - train_dataset.num_samples / train_batch_size) + total_num_steps = math.floor(train_dataset.num_samples / + train_batch_size) num_steps = 0 time_stat = list() time_train_one_epoch = None @@ -430,8 +433,8 @@ class BaseAPI: if self.model_type == 'detector': eval_batch_size = self._get_single_card_bs(train_batch_size) if eval_dataset is not None: - total_num_steps_eval = math.ceil( - eval_dataset.num_samples / eval_batch_size) + total_num_steps_eval = math.ceil(eval_dataset.num_samples / + eval_batch_size) if use_vdl: # VisualDL component @@ -473,7 +476,9 @@ class BaseAPI: if use_vdl: for k, v in step_metrics.items(): - log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps) + log_writer.add_scalar( + 'Metrics/Training(Step): {}'.format(k), v, + num_steps) # 估算剩余时间 avg_step_time = np.mean(time_stat) @@ -481,11 +486,12 @@ class BaseAPI: eta = (num_epochs - i - 1) * time_train_one_epoch + ( total_num_steps - step - 1) * avg_step_time else: - eta = ((num_epochs - i) * total_num_steps - step - - 1) * avg_step_time + eta = ((num_epochs - i) * total_num_steps - step - 1 + ) * avg_step_time if time_eval_one_epoch is not None: - eval_eta = (total_eval_times - i // - save_interval_epochs) * time_eval_one_epoch + eval_eta = ( + total_eval_times - i // save_interval_epochs + ) * time_eval_one_epoch else: eval_eta = ( total_eval_times - i // save_interval_epochs @@ -495,10 +501,11 @@ class BaseAPI: logging.info( "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" .format(i + 1, num_epochs, step + 1, total_num_steps, - dict2str(step_metrics), round( - avg_step_time, 2), eta_str)) + dict2str(step_metrics), + round(avg_step_time, 2), eta_str)) train_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) + zip(list(self.train_outputs.keys()), np.mean( + records, axis=0))) logging.info('[TRAIN] Epoch {} finished, {} .'.format( i + 1, dict2str(train_metrics))) time_train_one_epoch = time.time() - epoch_start_time @@ -534,7 +541,8 @@ class BaseAPI: if isinstance(v, np.ndarray): if v.size > 1: continue - log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1) + log_writer.add_scalar( + "Metrics/Eval(Epoch): {}".format(k), v, i + 1) self.save_model(save_dir=current_save_dir) time_eval_one_epoch = time.time() - eval_epoch_start_time eval_epoch_start_time = time.time() @@ -545,4 +553,4 @@ class BaseAPI: best_accuracy)) if eval_dataset is not None and early_stop: if earlystop(current_accuracy): - break \ No newline at end of file + break diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index b474ceeb4bf067ecade50acd9da66960216486ad..3b90fdcca16deba85656ff2b478129ce52ae795a 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -40,8 +40,8 @@ class BaseClassifier(BaseAPI): self.init_params = locals() super(BaseClassifier, self).__init__('classifier') if not hasattr(paddlex.cv.nets, str.lower(model_name)): - raise Exception( - "ERROR: There's no model named {}.".format(model_name)) + raise Exception("ERROR: There's no model named {}.".format( + model_name)) self.model_name = model_name self.labels = None self.num_classes = num_classes @@ -218,15 +218,14 @@ class BaseClassifier(BaseAPI): num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) - outputs = self.exe.run( - self.parallel_test_prog, - feed={'image': images}, - fetch_list=list(self.test_outputs.values())) + outputs = self.exe.run(self.parallel_test_prog, + feed={'image': images}, + fetch_list=list(self.test_outputs.values())) outputs = [outputs[0][:num_samples]] true_labels.extend(labels) pred_scores.extend(outputs[0].tolist()) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) pred_top1_label = np.argsort(pred_scores)[:, -1] pred_topk_label = np.argsort(pred_scores)[:, -k:] @@ -263,10 +262,9 @@ class BaseClassifier(BaseAPI): self.arrange_transforms( transforms=self.test_transforms, mode='test') im = self.test_transforms(img_file) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) + result = self.exe.run(self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values())) pred_label = np.argsort(result[0][0])[::-1][:true_topk] res = [{ 'category_id': l, @@ -400,3 +398,9 @@ class ShuffleNetV2(BaseClassifier): def __init__(self, num_classes=1000): super(ShuffleNetV2, self).__init__( model_name='ShuffleNetV2', num_classes=num_classes) + + +class HRNet_W18(BaseClassifier): + def __init__(self, num_classes=1000): + super(HRNet_W18, self).__init__( + model_name='HRNet_W18', num_classes=num_classes) diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index a253aa5d1d8c005c7903b57a9b9b36da45982d78..3127bd8549ae221f7f7604613bba2e1437b93605 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI): for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) - labels = np.array([d[1] for d in data]) + + _, _, im_h, im_w = images.shape + labels = list() + for d in data: + padding_label = np.zeros( + (1, im_h, im_w)).astype('int64') + self.ignore_index + padding_label[:, :im_h, :im_w] = d[1] + labels.append(padding_label) + labels = np.array(labels) + num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) + outputs = self.exe.run(self.parallel_test_prog, + feed=feed_data, + fetch_list=list(self.test_outputs.values()), + return_numpy=True) pred = outputs[0] if num_samples < batch_size: pred = pred[0:num_samples] @@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI): metrics = OrderedDict( zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], - [miou, category_iou, macc, category_acc, - conf_mat.kappa()])) + [miou, category_iou, macc, category_acc, conf_mat.kappa()])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() @@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI): transforms=self.test_transforms, mode='test') im, im_info = self.test_transforms(im_file) im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) + result = self.exe.run(self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values())) pred = result[0] pred = np.squeeze(pred).astype('uint8') logit = result[1] @@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI): pred = pred[0:h, 0:w] logit = logit[0:h, 0:w, :] else: - raise Exception("Unexpected info '{}' in im_info".format( - info[0])) + raise Exception("Unexpected info '{}' in im_info".format(info[ + 0])) return {'label_map': pred, 'score_map': logit} diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 3b7144f3e9eeeb656940cc480637c6e871fcc210..2c2acdd149d1157edfa5a485108698808b4a9c84 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI): Args: num_classes (int): 包含了背景类的类别数。默认为81。 backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', - 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 + 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 with_fpn (bool): 是否使用FPN结构。默认为True。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI): self.init_params = locals() super(FasterRCNN, self).__init__('detector') backbones = [ - 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' + 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', + 'HRNet_W18' ] assert backbone in backbones, "backbone should be one of {}".format( backbones) @@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI): layers = 101 variant = 'd' norm_type = 'affine_channel' + elif backbone_name == 'HRNet_W18': + backbone = paddlex.cv.nets.hrnet.HRNet( + width=18, freeze_norm=True, norm_decay=0., freeze_at=0) + if self.with_fpn is False: + self.with_fpn = True + return backbone if self.with_fpn: backbone = paddlex.cv.nets.resnet.ResNet( norm_type='bn' if norm_type is None else norm_type, @@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -227,7 +234,9 @@ class FasterRCNN(BaseAPI): # 构建训练、验证、测试网络 self.build_program() fuse_bn = True - if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: + if self.with_fpn and self.backbone in [ + 'ResNet18', 'ResNet50', 'HRNet_W18' + ]: fuse_bn = False self.net_initialize( startup_prog=fluid.default_startup_program(), @@ -310,11 +319,10 @@ class FasterRCNN(BaseAPI): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -339,13 +347,13 @@ class FasterRCNN(BaseAPI): res['is_difficult'] = (np.array(res_is_difficult), [res_is_difficult_lod]) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=True) metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return metrics, eval_details return metrics @@ -359,7 +367,8 @@ class FasterRCNN(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -373,15 +382,14 @@ class FasterRCNN(BaseAPI): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) diff --git a/paddlex/cv/models/hrnet.py b/paddlex/cv/models/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0eec2be561911fd18bed97eef3e49b897c60510a --- /dev/null +++ b/paddlex/cv/models/hrnet.py @@ -0,0 +1,175 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +import paddle.fluid as fluid +import paddlex +from collections import OrderedDict +from .deeplabv3p import DeepLabv3p + + +class HRNet(DeepLabv3p): + """实现HRNet网络的构建并进行训练、评估、预测和模型导出。 + + Args: + num_classes (int): 类别数。 + width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 + use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 + use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 + 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 + class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 + num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 + 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, + 即平时使用的交叉熵损失函数。 + ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 + + Raises: + ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 + ValueError: class_weight为list, 但长度不等于num_class。 + class_weight为str, 但class_weight.low()不等于dynamic。 + TypeError: class_weight不为None时,其类型不是list或str。 + """ + + def __init__(self, + num_classes=2, + width=18, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255): + self.init_params = locals() + super(DeepLabv3p, self).__init__('segmenter') + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + self.num_classes = num_classes + self.width = width + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.labels = None + + def build_net(self, mode='train'): + model = paddlex.cv.nets.segmentation.HRNet( + self.num_classes, + width=self.width, + mode=mode, + use_bce_loss=self.use_bce_loss, + use_dice_loss=self.use_dice_loss, + class_weight=self.class_weight, + ignore_index=self.ignore_index) + inputs = model.generate_inputs() + model_out = model.build_net(inputs) + outputs = OrderedDict() + if mode == 'train': + self.optimizer.minimize(model_out) + outputs['loss'] = model_out + elif mode == 'eval': + outputs['loss'] = model_out[0] + outputs['pred'] = model_out[1] + outputs['label'] = model_out[2] + outputs['mask'] = model_out[3] + else: + outputs['pred'] = model_out[0] + outputs['logit'] = model_out[1] + return inputs, outputs + + def default_optimizer(self, + learning_rate, + num_epochs, + num_steps_each_epoch, + lr_decay_power=0.9): + decay_step = num_epochs * num_steps_each_epoch + lr_decay = fluid.layers.polynomial_decay( + learning_rate, + decay_step, + end_learning_rate=0, + power=lr_decay_power) + optimizer = fluid.optimizer.Momentum( + lr_decay, + momentum=0.9, + regularization=fluid.regularizer.L2Decay( + regularization_coeff=5e-04)) + return optimizer + + def train(self, + num_epochs, + train_dataset, + train_batch_size=2, + eval_dataset=None, + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights='IMAGENET', + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + use_vdl=False, + sensitivities_file=None, + eval_metric_loss=0.05, + early_stop=False, + early_stop_patience=5, + resume_checkpoint=None): + """训练。 + + Args: + num_epochs (int): 训练迭代轮数。 + train_dataset (paddlex.datasets): 训练数据读取器。 + train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 + eval_dataset (paddlex.datasets): 评估数据读取器。 + save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 + log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 + save_dir (str): 模型保存路径。默认'output'。 + pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', + 则自动下载在IMAGENET图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 + fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 + learning_rate (float): 默认优化器的初始学习率。默认0.01。 + lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 + use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 + sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', + 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 + early_stop (bool): 是否使用提前终止训练策略。默认值为False。 + early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 + 连续下降或持平,则终止训练。默认值为5。 + resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + + Raises: + ValueError: 模型从inference model进行加载。 + """ + return super(HRNet, self).train( + num_epochs, train_dataset, train_batch_size, eval_dataset, + save_interval_epochs, log_interval_steps, save_dir, + pretrain_weights, optimizer, learning_rate, lr_decay_power, + use_vdl, sensitivities_file, eval_metric_loss, early_stop, + early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index ba5da33d8b2a660cf23a1d57b9bf97a312e29002..dab9c8c532eed5d5a0fc9842ae9d33be7101c202 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN): Args: num_classes (int): 包含了背景类的类别数。默认为81。 backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', - 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 + 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 with_fpn (bool): 是否使用FPN结构。默认为True。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN): anchor_sizes=[32, 64, 128, 256, 512]): self.init_params = locals() backbones = [ - 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' + 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', + 'HRNet_W18' ] assert backbone in backbones, "backbone should be one of {}".format( backbones) @@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_mask', model_out['loss_mask']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_mask', model_out['loss_mask']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -194,7 +195,9 @@ class MaskRCNN(FasterRCNN): # 构建训练、验证、测试网络 self.build_program() fuse_bn = True - if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: + if self.with_fpn and self.backbone in [ + 'ResNet18', 'ResNet50', 'HRNet_W18' + ]: fuse_bn = False self.net_initialize( startup_prog=fluid.default_startup_program(), @@ -276,11 +279,10 @@ class MaskRCNN(FasterRCNN): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()), @@ -292,8 +294,8 @@ class MaskRCNN(FasterRCNN): res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) ap_stats, eval_details = eval_results( results, @@ -302,8 +304,8 @@ class MaskRCNN(FasterRCNN): with_background=True, resolution=self.mask_head_resolution) if metric == 'VOC': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) @@ -311,8 +313,8 @@ class MaskRCNN(FasterRCNN): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [ap_stats[0][0], ap_stats[1][0]])) @@ -331,8 +333,10 @@ class MaskRCNN(FasterRCNN): transforms (paddlex.det.transforms): 数据预处理操作。 Returns: - dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、 - 预测框得分组成。 + dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、 + 预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 原图大小的预测二值图(1表示预测框类别,0表示背景类)、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -346,15 +350,14 @@ class MaskRCNN(FasterRCNN): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) @@ -368,8 +371,8 @@ class MaskRCNN(FasterRCNN): import pycocotools.mask as mask_util for index, xywh_res in enumerate(xywh_results): del xywh_res['image_id'] - xywh_res['mask'] = mask_util.decode( - segm_results[index]['segmentation']) + xywh_res['mask'] = mask_util.decode(segm_results[index][ + 'segmentation']) xywh_res['category'] = self.labels[xywh_res['category_id']] results.append(xywh_res) return results diff --git a/paddlex/cv/models/utils/pretrain_weights.py b/paddlex/cv/models/utils/pretrain_weights.py index 81790a20144d8c255601b8a778eebf02c409c55d..3abbdd93d80efd5eb41ead32ac321d758d080104 100644 --- a/paddlex/cv/models/utils/pretrain_weights.py +++ b/paddlex/cv/models/utils/pretrain_weights.py @@ -56,6 +56,20 @@ image_pretrain = { 'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar', 'ShuffleNetV2': 'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar', + 'HRNet_W18': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar', + 'HRNet_W30': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W30_C_pretrained.tar', + 'HRNet_W32': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W32_C_pretrained.tar', + 'HRNet_W40': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W40_C_pretrained.tar', + 'HRNet_W48': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar', + 'HRNet_W60': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar', + 'HRNet_W64': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar', } coco_pretrain = { diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index e1cd2d684f44a6cb93a0ac3415ccc30c653aa60e..9646c81272e22bccf1390f4a738d13c41cf5a445 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -306,11 +306,10 @@ class YOLOv3(BaseAPI): images = np.array([d[0] for d in data]) im_sizes = np.array([d[1] for d in data]) feed_data = {'image': images, 'im_size': im_sizes} - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -326,13 +325,13 @@ class YOLOv3(BaseAPI): res['gt_label'] = (res_gt_label, []) res['is_difficult'] = (res_is_difficult, []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=False) evaluate_metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return evaluate_metrics, eval_details return evaluate_metrics @@ -346,7 +345,8 @@ class YOLOv3(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -359,14 +359,11 @@ class YOLOv3(BaseAPI): im, im_size = self.test_transforms(img_file) im = np.expand_dims(im, axis=0) im_size = np.expand_dims(im_size, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_size': im_size - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={'image': im, + 'im_size': im_size}, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) diff --git a/paddlex/cv/nets/__init__.py b/paddlex/cv/nets/__init__.py index 5948c529493e55aadf7721ab9eb046adf89f3a72..b1441c59395c2f7788dbab937ab5ad629d4aa940 100644 --- a/paddlex/cv/nets/__init__.py +++ b/paddlex/cv/nets/__init__.py @@ -23,6 +23,7 @@ from .segmentation import DeepLabv3p from .xception import Xception from .densenet import DenseNet from .shufflenet_v2 import ShuffleNetV2 +from .hrnet import HRNet def resnet18(input, num_classes=1000): @@ -51,14 +52,20 @@ def resnet50_vd(input, num_classes=1000): def resnet50_vd_ssld(input, num_classes=1000): - model = ResNet(layers=50, num_classes=num_classes, - variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) + model = ResNet( + layers=50, + num_classes=num_classes, + variant='d', + lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) return model(input) def resnet101_vd_ssld(input, num_classes=1000): - model = ResNet(layers=101, num_classes=num_classes, - variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) + model = ResNet( + layers=101, + num_classes=num_classes, + variant='d', + lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) return model(input) @@ -93,14 +100,18 @@ def mobilenetv3_large(input, num_classes=1000): def mobilenetv3_small_ssld(input, num_classes=1000): - model = MobileNetV3(num_classes=num_classes, model_name='small', - lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) + model = MobileNetV3( + num_classes=num_classes, + model_name='small', + lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) return model(input) def mobilenetv3_large_ssld(input, num_classes=1000): - model = MobileNetV3(num_classes=num_classes, model_name='large', - lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) + model = MobileNetV3( + num_classes=num_classes, + model_name='large', + lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) return model(input) @@ -133,6 +144,12 @@ def densenet201(input, num_classes=1000): model = DenseNet(layers=201, num_classes=num_classes) return model(input) + def shufflenetv2(input, num_classes=1000): model = ShuffleNetV2(num_classes=num_classes) return model(input) + + +def hrnet_w18(input, num_classes=1000): + model = HRNet(width=18, num_classes=num_classes) + return model(input) diff --git a/paddlex/cv/nets/darknet.py b/paddlex/cv/nets/darknet.py index 345f1c114228fbd402f554df6cdf5d77829a3eab..71c296c3127f855718df85a3a606e10c5cf49e25 100644 --- a/paddlex/cv/nets/darknet.py +++ b/paddlex/cv/nets/darknet.py @@ -68,13 +68,14 @@ class DarkNet(object): bias_attr=False) bn_name = name + ".bn" - + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(float(self.norm_decay)) bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') + regularizer=regularizer, name=bn_name + '.scale') bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') + regularizer=regularizer, name=bn_name + '.offset') out = fluid.layers.batch_norm( input=conv, @@ -182,4 +183,4 @@ class DarkNet(object): bias_attr=ParamAttr(name='fc_offset')) return out - return blocks \ No newline at end of file + return blocks diff --git a/paddlex/cv/nets/densenet.py b/paddlex/cv/nets/densenet.py index 5ec49a2601e3b9104bf50cb201ad30995aa0927c..a7238b2cd8775f20210d04d41f6caa1343c68092 100644 --- a/paddlex/cv/nets/densenet.py +++ b/paddlex/cv/nets/densenet.py @@ -173,4 +173,4 @@ class DenseNet(object): bn_ac_conv = fluid.layers.dropout( x=bn_ac_conv, dropout_prob=dropout) bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1) - return bn_ac_conv \ No newline at end of file + return bn_ac_conv diff --git a/paddlex/cv/nets/detection/faster_rcnn.py b/paddlex/cv/nets/detection/faster_rcnn.py index 7e7bd63dcb64065a3c7616b2e344d7152cbfdfde..3a7cf75a187e88383a3d72f0a56c5e8758449505 100644 --- a/paddlex/cv/nets/detection/faster_rcnn.py +++ b/paddlex/cv/nets/detection/faster_rcnn.py @@ -21,7 +21,7 @@ import copy from paddle import fluid -from .fpn import FPN +from .fpn import (FPN, HRFPN) from .rpn_head import (RPNHead, FPNRPNHead) from .roi_extractor import (RoIAlign, FPNRoIAlign) from .bbox_head import (BBoxHead, TwoFCHead) @@ -82,7 +82,12 @@ class FasterRCNN(object): self.backbone = backbone self.mode = mode if with_fpn and fpn is None: - fpn = FPN() + if self.backbone.__class__.__name__.startswith('HRNet'): + fpn = HRFPN() + fpn.min_level = 2 + fpn.max_level = 6 + else: + fpn = FPN() self.fpn = fpn self.num_classes = num_classes if rpn_head is None: diff --git a/paddlex/cv/nets/detection/fpn.py b/paddlex/cv/nets/detection/fpn.py index 8fd843b149d38fc2f640aa34df9e26432a25899e..710304812a520f6775c1d241e7958c64a4536768 100644 --- a/paddlex/cv/nets/detection/fpn.py +++ b/paddlex/cv/nets/detection/fpn.py @@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Xavier from paddle.fluid.regularizer import L2Decay -__all__ = ['FPN'] +__all__ = ['FPN', 'HRFPN'] def ConvNorm(input, @@ -219,8 +219,8 @@ class FPN(object): body_name = body_name_list[i] body_input = body_dict[body_name] top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) + fpn_inner_single = self._add_topdown_lateral(body_name, body_input, + top_output) self.fpn_inner_output[i] = fpn_inner_single fpn_dict = {} fpn_name_list = [] @@ -293,3 +293,107 @@ class FPN(object): spatial_scale.insert(0, spatial_scale[0] * 0.5) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) return res_dict, spatial_scale + + +class HRFPN(object): + """ + HRNet, see https://arxiv.org/abs/1908.07919 + + Args: + num_chan (int): number of feature channels + pooling_type (str): pooling type of downsampling + share_conv (bool): whethet to share conv for different layers' reduction + spatial_scale (list): feature map scaling factor + """ + + def __init__( + self, + num_chan=256, + pooling_type="avg", + share_conv=False, + spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ): + self.num_chan = num_chan + self.pooling_type = pooling_type + self.share_conv = share_conv + self.spatial_scale = spatial_scale + + def get_output(self, body_dict): + num_out = len(self.spatial_scale) + body_name_list = list(body_dict.keys()) + + num_backbone_stages = len(body_name_list) + + outs = [] + outs.append(body_dict[body_name_list[0]]) + + # resize + for i in range(1, len(body_dict)): + resized = self.resize_input_tensor(body_dict[body_name_list[i]], + outs[0], 2**i) + outs.append(resized) + + # concat + out = fluid.layers.concat(outs, axis=1) + + # reduction + out = fluid.layers.conv2d( + input=out, + num_filters=self.num_chan, + filter_size=1, + stride=1, + padding=0, + param_attr=ParamAttr(name='hrfpn_reduction_weights'), + bias_attr=False) + + # conv + outs = [out] + for i in range(1, num_out): + outs.append( + self.pooling( + out, + size=2**i, + stride=2**i, + pooling_type=self.pooling_type)) + outputs = [] + + for i in range(num_out): + conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str( + i) + conv = fluid.layers.conv2d( + input=outs[i], + num_filters=self.num_chan, + filter_size=3, + stride=1, + padding=1, + param_attr=ParamAttr(name=conv_name + "_weights"), + bias_attr=False) + outputs.append(conv) + + for idx in range(0, num_out - len(body_name_list)): + body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**( + idx + 1))) + + outputs = outputs[::-1] + body_name_list = body_name_list[::-1] + + res_dict = OrderedDict([(body_name_list[k], outputs[k]) + for k in range(len(body_name_list))]) + return res_dict, self.spatial_scale + + def resize_input_tensor(self, body_input, ref_output, scale): + shape = fluid.layers.shape(ref_output) + shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4]) + out_shape_ = shape_hw + out_shape = fluid.layers.cast(out_shape_, dtype='int32') + out_shape.stop_gradient = True + body_output = fluid.layers.resize_bilinear( + body_input, scale=scale, out_shape=out_shape) + return body_output + + def pooling(self, input, size, stride, pooling_type): + pool = fluid.layers.pool2d( + input=input, + pool_size=size, + pool_stride=stride, + pool_type=pooling_type) + return pool diff --git a/paddlex/cv/nets/detection/mask_rcnn.py b/paddlex/cv/nets/detection/mask_rcnn.py index 010bacfe921137c69802ff25a405c65ea2141f7f..b67f44a61db87fd858c032617df2f191bbbda69f 100644 --- a/paddlex/cv/nets/detection/mask_rcnn.py +++ b/paddlex/cv/nets/detection/mask_rcnn.py @@ -21,7 +21,7 @@ import copy import paddle.fluid as fluid -from .fpn import FPN +from .fpn import (FPN, HRFPN) from .rpn_head import (RPNHead, FPNRPNHead) from .roi_extractor import (RoIAlign, FPNRoIAlign) from .bbox_head import (BBoxHead, TwoFCHead) @@ -92,11 +92,15 @@ class MaskRCNN(object): self.backbone = backbone self.mode = mode if with_fpn and fpn is None: - fpn = FPN( - num_chan=num_chan, - min_level=min_level, - max_level=max_level, - spatial_scale=spatial_scale) + if self.backbone.__class__.__name__.startswith('HRNet'): + fpn = HRFPN() + fpn.min_level = 2 + fpn.max_level = 6 + else: + fpn = FPN(num_chan=num_chan, + min_level=min_level, + max_level=max_level, + spatial_scale=spatial_scale) self.fpn = fpn self.num_classes = num_classes if rpn_head is None: diff --git a/paddlex/cv/nets/hrnet.py b/paddlex/cv/nets/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..19f9cb336bce66a7dc68d65e316440adf46857e4 --- /dev/null +++ b/paddlex/cv/nets/hrnet.py @@ -0,0 +1,474 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay + +from numbers import Integral +from paddle.fluid.initializer import MSRA +import math + +__all__ = ['HRNet'] + + +class HRNet(object): + def __init__(self, + width=40, + has_se=False, + freeze_at=0, + norm_type='bn', + freeze_norm=False, + norm_decay=0., + feature_maps=[2, 3, 4, 5], + num_classes=None): + super(HRNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn'] + + self.width = width + self.has_se = has_se + self.channels = { + 18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]], + 30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]], + 32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]], + 40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + 44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]], + 48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]], + 60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]], + 64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]], + } + + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.feature_maps = feature_maps + self.num_classes = num_classes + self.end_points = [] + return + + def net(self, input, class_dim=1000): + width = self.width + channels_2, channels_3, channels_4 = self.channels[width] + num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 + + x = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_1') + x = self.conv_bn_layer( + input=x, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_2') + + la1 = self.layer1(x, name='layer2') + tr1 = self.transition_layer([la1], [256], channels_2, name='tr1') + st2 = self.stage(tr1, num_modules_2, channels_2, name='st2') + tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2') + st3 = self.stage(tr2, num_modules_3, channels_3, name='st3') + tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3') + st4 = self.stage(tr3, num_modules_4, channels_4, name='st4') + + # classification + if self.num_classes: + last_cls = self.last_cls_out(x=st4, name='cls_head') + y = last_cls[0] + last_num_filters = [256, 512, 1024] + for i in range(3): + y = fluid.layers.elementwise_add( + last_cls[i + 1], + self.conv_bn_layer( + input=y, + filter_size=3, + num_filters=last_num_filters[i], + stride=2, + name='cls_head_add' + str(i + 1))) + + y = self.conv_bn_layer( + input=y, + filter_size=1, + num_filters=2048, + stride=1, + name='cls_head_last_conv') + pool = fluid.layers.pool2d( + input=y, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name='fc_offset')) + return out + + # segmentation + if self.feature_maps == "stage4": + return st4 + + self.end_points = st4 + return st4[-1] + + def layer1(self, input, name=None): + conv = input + for i in range(4): + conv = self.bottleneck_block( + conv, + num_filters=64, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1)) + return conv + + def transition_layer(self, x, in_channels, out_channels, name=None): + num_in = len(in_channels) + num_out = len(out_channels) + out = [] + for i in range(num_out): + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.conv_bn_layer( + x[i], + filter_size=3, + num_filters=out_channels[i], + name=name + '_layer_' + str(i + 1)) + out.append(residual) + else: + out.append(x[i]) + else: + residual = self.conv_bn_layer( + x[-1], + filter_size=3, + num_filters=out_channels[i], + stride=2, + name=name + '_layer_' + str(i + 1)) + out.append(residual) + return out + + def branches(self, x, block_num, channels, name=None): + out = [] + for i in range(len(channels)): + residual = x[i] + for j in range(block_num): + residual = self.basic_block( + residual, + channels[i], + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1)) + out.append(residual) + return out + + def fuse_layers(self, x, channels, multi_scale_output=True, name=None): + out = [] + for i in range(len(channels) if multi_scale_output else 1): + residual = x[i] + if self.feature_maps == "stage4": + shape = fluid.layers.shape(residual) + width = shape[-1] + height = shape[-2] + for j in range(len(channels)): + if j > i: + y = self.conv_bn_layer( + x[j], + filter_size=1, + num_filters=channels[i], + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) + if self.feature_maps == "stage4": + y = fluid.layers.resize_bilinear( + input=y, out_shape=[height, width]) + else: + y = fluid.layers.resize_nearest( + input=y, scale=2**(j - i)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = x[j] + for k in range(i - j): + if k == i - j - 1: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[i], + stride=2, + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + else: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[j], + stride=2, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + residual = fluid.layers.relu(residual) + out.append(residual) + return out + + def high_resolution_module(self, + x, + channels, + multi_scale_output=True, + name=None): + residual = self.branches(x, 4, channels, name=name) + out = self.fuse_layers( + residual, + channels, + multi_scale_output=multi_scale_output, + name=name) + return out + + def stage(self, + x, + num_modules, + channels, + multi_scale_output=True, + name=None): + out = x + for i in range(num_modules): + if i == num_modules - 1 and multi_scale_output == False: + out = self.high_resolution_module( + out, + channels, + multi_scale_output=False, + name=name + '_' + str(i + 1)) + else: + out = self.high_resolution_module( + out, channels, name=name + '_' + str(i + 1)) + + return out + + def last_cls_out(self, x, name=None): + out = [] + num_filters_list = [32, 64, 128, 256] + for i in range(len(x)): + out.append( + self.bottleneck_block( + input=x[i], + num_filters=num_filters_list[i], + name=name + 'conv_' + str(i + 1), + downsample=True)) + return out + + def basic_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + if_act=False, + name=name + '_conv2') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def bottleneck_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv2') + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_conv3') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels / reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = self._bn(input=conv, bn_name=bn_name) + if if_act: + bn = fluid.layers.relu(bn) + return bn + + def _bn(self, input, act=None, bn_name=None): + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + if self.num_classes or self.feature_maps == "stage4": + regularizer = None + pattr_initializer = fluid.initializer.Constant(1.0) + battr_initializer = fluid.initializer.Constant(0.0) + else: + regularizer = L2Decay(norm_decay) + pattr_initializer = None + battr_initializer = None + pattr = ParamAttr( + name=bn_name + '_scale', + learning_rate=norm_lr, + regularizer=regularizer, + initializer=pattr_initializer) + battr = ParamAttr( + name=bn_name + '_offset', + learning_rate=norm_lr, + regularizer=regularizer, + initializer=battr_initializer) + + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=input, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def __call__(self, input): + assert isinstance(input, Variable) + if isinstance(self.feature_maps, (list, tuple)): + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + out = self.net(input) + if self.num_classes or self.feature_maps == "stage4": + return out + + for i in feature_maps: + res = self.end_points[i - 2] + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + + return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) + for idx, feat in enumerate(res_endpoints)]) diff --git a/paddlex/cv/nets/mobilenet_v1.py b/paddlex/cv/nets/mobilenet_v1.py index 3ee1a8c1561cdc0308c84a1a79ab223763b17148..c9b99255fb36eb9a9b44ea12ba5ed3c099620db4 100755 --- a/paddlex/cv/nets/mobilenet_v1.py +++ b/paddlex/cv/nets/mobilenet_v1.py @@ -79,10 +79,14 @@ class MobileNetV1(object): bn_name = name + "_bn" norm_decay = self.norm_decay + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(norm_decay) bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') + regularizer=regularizer, name=bn_name + '_scale') bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') + regularizer=regularizer, name=bn_name + '_offset') return fluid.layers.batch_norm( input=conv, act=act, @@ -189,12 +193,12 @@ class MobileNetV1(object): if self.num_classes: out = fluid.layers.pool2d( input=out, pool_type='avg', global_pooling=True) - output = fluid.layers.fc( - input=out, - size=self.num_classes, - param_attr=ParamAttr( - initializer=fluid.initializer.MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) + output = fluid.layers.fc(input=out, + size=self.num_classes, + param_attr=ParamAttr( + initializer=fluid.initializer.MSRA(), + name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) return output if not self.with_extra_blocks: @@ -213,4 +217,4 @@ class MobileNetV1(object): module17 = self._extra_block(module16, num_filters[3][0], num_filters[3][1], 1, 2, self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 \ No newline at end of file + return module11, module13, module14, module15, module16, module17 diff --git a/paddlex/cv/nets/mobilenet_v3.py b/paddlex/cv/nets/mobilenet_v3.py index 5f85ccda1f119a4b14bdf3e114bbe5b88a1b46fe..6adcee03d7bb9c5ffab0ceb7198083e3534e7ab9 100644 --- a/paddlex/cv/nets/mobilenet_v3.py +++ b/paddlex/cv/nets/mobilenet_v3.py @@ -31,6 +31,7 @@ class MobileNetV3(): with_extra_blocks (bool): if extra blocks should be added. extra_block_filters (list): number of filter for each extra block. """ + def __init__(self, scale=1.0, model_name='small', @@ -113,29 +114,36 @@ class MobileNetV3(): lr_idx = self.curr_stage // self.lr_interval lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) lr_mult = self.lr_mult_list[lr_idx] - conv_param_attr = ParamAttr(name=name + '_weights', - learning_rate=lr_mult, - regularizer=L2Decay(self.conv_decay)) - conv = fluid.layers.conv2d(input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=conv_param_attr, - bias_attr=False) + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(self.conv_decay) + conv_param_attr = ParamAttr( + name=name + '_weights', + learning_rate=lr_mult, + regularizer=regularizer) + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=conv_param_attr, + bias_attr=False) bn_name = name + '_bn' - bn_param_attr = ParamAttr(name=bn_name + "_scale", - regularizer=L2Decay(self.norm_decay)) - bn_bias_attr = ParamAttr(name=bn_name + "_offset", - regularizer=L2Decay(self.norm_decay)) - bn = fluid.layers.batch_norm(input=conv, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + bn_param_attr = ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(self.norm_decay)) + bn_bias_attr = ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(self.norm_decay)) + bn = fluid.layers.batch_norm( + input=conv, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') if if_act: if act == 'relu': bn = fluid.layers.relu(bn) @@ -152,12 +160,10 @@ class MobileNetV3(): lr_idx = self.curr_stage // self.lr_interval lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) lr_mult = self.lr_mult_list[lr_idx] - + num_mid_filter = int(num_out_filter // ratio) - pool = fluid.layers.pool2d(input=input, - pool_type='avg', - global_pooling=True, - use_cudnn=False) + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) conv1 = fluid.layers.conv2d( input=pool, filter_size=1, @@ -191,43 +197,46 @@ class MobileNetV3(): use_se=False, name=None): input_data = input - conv0 = self._conv_bn_layer(input=input, - filter_size=1, - num_filters=num_mid_filter, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + '_expand') + conv0 = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') if self.block_stride == 16 and stride == 2: self.end_points.append(conv0) - conv1 = self._conv_bn_layer(input=conv0, - filter_size=filter_size, - num_filters=num_mid_filter, - stride=stride, - padding=int((filter_size - 1) // 2), - if_act=True, - act=act, - num_groups=num_mid_filter, - use_cudnn=False, - name=name + '_depthwise') + conv1 = self._conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') if use_se: - conv1 = self._se_block(input=conv1, - num_out_filter=num_mid_filter, - name=name + '_se') + conv1 = self._se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') - conv2 = self._conv_bn_layer(input=conv1, - filter_size=1, - num_filters=num_out_filter, - stride=1, - padding=0, - if_act=False, - name=name + '_linear') + conv2 = self._conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear') if num_in_filter != num_out_filter or stride != 1: return conv2 else: - return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None) + return fluid.layers.elementwise_add( + x=input_data, y=conv2, act=None) def _extra_block_dw(self, input, @@ -235,29 +244,32 @@ class MobileNetV3(): num_filters2, stride, name=None): - pointwise_conv = self._conv_bn_layer(input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - padding="SAME", - act='relu6', - name=name + "_extra1") - depthwise_conv = self._conv_bn_layer(input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=stride, - padding="SAME", - num_groups=int(num_filters1), - act='relu6', - use_cudnn=False, - name=name + "_extra2_dw") - normal_conv = self._conv_bn_layer(input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2), - stride=1, - padding="SAME", - act='relu6', - name=name + "_extra2_sep") + pointwise_conv = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra1") + depthwise_conv = self._conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=stride, + padding="SAME", + num_groups=int(num_filters1), + act='relu6', + use_cudnn=False, + name=name + "_extra2_dw") + normal_conv = self._conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra2_sep") return normal_conv def __call__(self, input): @@ -282,36 +294,39 @@ class MobileNetV3(): self.block_stride *= layer_cfg[5] if layer_cfg[5] == 2: blocks.append(conv) - conv = self._residual_unit(input=conv, - num_in_filter=inplanes, - num_mid_filter=int(scale * layer_cfg[1]), - num_out_filter=int(scale * layer_cfg[2]), - act=layer_cfg[4], - stride=layer_cfg[5], - filter_size=layer_cfg[0], - use_se=layer_cfg[3], - name='conv' + str(i + 2)) - + conv = self._residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=int(scale * layer_cfg[1]), + num_out_filter=int(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = int(scale * layer_cfg[2]) i += 1 self.curr_stage = i blocks.append(conv) if self.num_classes: - conv = self._conv_bn_layer(input=conv, - filter_size=1, - num_filters=int(scale * self.cls_ch_squeeze), - stride=1, - padding=0, - num_groups=1, - if_act=True, - act='hard_swish', - name='conv_last') - - conv = fluid.layers.pool2d(input=conv, - pool_type='avg', - global_pooling=True, - use_cudnn=False) + conv = self._conv_bn_layer( + input=conv, + filter_size=1, + num_filters=int(scale * self.cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + + conv = fluid.layers.pool2d( + input=conv, + pool_type='avg', + global_pooling=True, + use_cudnn=False) conv = fluid.layers.conv2d( input=conv, num_filters=self.cls_ch_expand, @@ -326,22 +341,23 @@ class MobileNetV3(): out = fluid.layers.fc(input=drop, size=self.num_classes, param_attr=ParamAttr(name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) + bias_attr=ParamAttr(name='fc_offset')) return out if not self.with_extra_blocks: return blocks # extra block - conv_extra = self._conv_bn_layer(conv, - filter_size=1, - num_filters=int(scale * cfg[-1][1]), - stride=1, - padding="SAME", - num_groups=1, - if_act=True, - act='hard_swish', - name='conv' + str(i + 2)) + conv_extra = self._conv_bn_layer( + conv, + filter_size=1, + num_filters=int(scale * cfg[-1][1]), + stride=1, + padding="SAME", + num_groups=1, + if_act=True, + act='hard_swish', + name='conv' + str(i + 2)) self.end_points.append(conv_extra) i += 1 for block_filter in self.extra_block_filters: diff --git a/paddlex/cv/nets/resnet.py b/paddlex/cv/nets/resnet.py index 40c6965fccbc823f06abc8270c30d7c3e7e21c74..ff7a8d17ac9862f319d81ddcc5cb938918677692 100644 --- a/paddlex/cv/nets/resnet.py +++ b/paddlex/cv/nets/resnet.py @@ -135,8 +135,10 @@ class ResNet(object): filter_size=filter_size, stride=stride, padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + param_attr=ParamAttr( + initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr( + initializer=Constant(0.0), name=name + ".b_0"), act=act, name=name) return out @@ -151,7 +153,8 @@ class ResNet(object): name=None, dcn_v2=False, use_lr_mult_list=False): - lr_mult = self.lr_mult_list[self.curr_stage] if use_lr_mult_list else 1.0 + lr_mult = self.lr_mult_list[ + self.curr_stage] if use_lr_mult_list else 1.0 _name = self.prefix_name + name if self.prefix_name != '' else name if not dcn_v2: conv = fluid.layers.conv2d( @@ -162,8 +165,8 @@ class ResNet(object): padding=(filter_size - 1) // 2, groups=groups, act=None, - param_attr=ParamAttr(name=_name + "_weights", - learning_rate=lr_mult), + param_attr=ParamAttr( + name=_name + "_weights", learning_rate=lr_mult), bias_attr=False, name=_name + '.conv2d.output.1') else: @@ -202,14 +205,18 @@ class ResNet(object): norm_lr = 0. if self.freeze_norm else lr_mult norm_decay = self.norm_decay + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(norm_decay) pattr = ParamAttr( name=bn_name + '_scale', learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + regularizer=regularizer) battr = ParamAttr( name=bn_name + '_offset', learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + regularizer=regularizer) if self.norm_type in ['bn', 'sync_bn']: global_stats = True if self.freeze_norm else False @@ -262,8 +269,8 @@ class ResNet(object): pool_padding=0, ceil_mode=True, pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name, - use_lr_mult_list=True) + return self._conv_norm( + input, ch_out, 1, 1, name=name, use_lr_mult_list=True) return self._conv_norm(input, ch_out, 1, stride, name=name) else: return input diff --git a/paddlex/cv/nets/segmentation/__init__.py b/paddlex/cv/nets/segmentation/__init__.py index f5af8c95426abb9c7b181ff8c717fe99edbf9760..9b8f4391102fd41ac2b60c54141af534d390b071 100644 --- a/paddlex/cv/nets/segmentation/__init__.py +++ b/paddlex/cv/nets/segmentation/__init__.py @@ -14,5 +14,6 @@ from .unet import UNet from .deeplabv3p import DeepLabv3p +from .hrnet import HRNet from .model_utils import libs from .model_utils import loss diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py index 08dad240c0b28d6e6e13845dcc0c9148c442014f..60a34d0128bf271d42fa8658100318ce05e31812 100644 --- a/paddlex/cv/nets/segmentation/deeplabv3p.py +++ b/paddlex/cv/nets/segmentation/deeplabv3p.py @@ -135,7 +135,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('encoder'): channel = 256 with scope("image_pool"): @@ -151,8 +152,8 @@ class DeepLabv3p(object): padding=0, param_attr=param_attr)) input_shape = fluid.layers.shape(input) - image_avg = fluid.layers.resize_bilinear( - image_avg, input_shape[2:]) + image_avg = fluid.layers.resize_bilinear(image_avg, + input_shape[2:]) with scope("aspp0"): aspp0 = bn_relu( @@ -244,7 +245,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('decoder'): with scope('concat'): decode_shortcut = bn_relu( @@ -326,9 +328,6 @@ class DeepLabv3p(object): if self.mode == 'train': inputs['label'] = fluid.data( dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') return inputs def build_net(self, inputs): @@ -351,7 +350,8 @@ class DeepLabv3p(object): name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.01)) with scope('logit'): with fluid.name_scope('last_conv'): logit = conv( diff --git a/paddlex/cv/nets/segmentation/hrnet.py b/paddlex/cv/nets/segmentation/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..65f3bfbc2514a325da28c022b2ae2c434d7a2eb4 --- /dev/null +++ b/paddlex/cv/nets/segmentation/hrnet.py @@ -0,0 +1,196 @@ +# coding: utf8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr +from .model_utils.libs import sigmoid_to_softmax +from .model_utils.loss import softmax_with_loss +from .model_utils.loss import dice_loss +from .model_utils.loss import bce_loss +import paddlex +import paddlex.utils.logging as logging + + +class HRNet(object): + def __init__(self, + num_classes, + mode='train', + width=18, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255): + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.mode = mode + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.backbone = paddlex.cv.nets.hrnet.HRNet( + width=width, feature_maps="stage4") + + def build_net(self, inputs): + if self.use_dice_loss or self.use_bce_loss: + self.num_classes = 1 + image = inputs['image'] + st4 = self.backbone(image) + # upsample + shape = fluid.layers.shape(st4[0])[-2:] + st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape) + st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape) + st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) + + out = fluid.layers.concat(st4, axis=1) + last_channels = sum(self.backbone.channels[self.backbone.width][-1]) + + out = self._conv_bn_layer( + input=out, + filter_size=1, + num_filters=last_channels, + stride=1, + if_act=True, + name='conv-2') + out = fluid.layers.conv2d( + input=out, + num_filters=self.num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name='conv-1_weights'), + bias_attr=False) + + input_shape = fluid.layers.shape(image)[-2:] + logit = fluid.layers.resize_bilinear(out, input_shape) + + if self.num_classes == 1: + out = sigmoid_to_softmax(logit) + out = fluid.layers.transpose(out, [0, 2, 3, 1]) + else: + out = fluid.layers.transpose(logit, [0, 2, 3, 1]) + + pred = fluid.layers.argmax(out, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + + if self.mode == 'train': + label = inputs['label'] + mask = label != self.ignore_index + return self._get_loss(logit, label, mask) + elif self.mode == 'eval': + label = inputs['label'] + mask = label != self.ignore_index + loss = self._get_loss(logit, label, mask) + return loss, pred, label, mask + else: + if self.num_classes == 1: + logit = sigmoid_to_softmax(logit) + else: + logit = fluid.layers.softmax(logit, axis=1) + return pred, logit + + def generate_inputs(self): + inputs = OrderedDict() + inputs['image'] = fluid.data( + dtype='float32', shape=[None, 3, None, None], name='image') + if self.mode == 'train': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + elif self.mode == 'eval': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + return inputs + + def _get_loss(self, logit, label, mask): + avg_loss = 0 + if not (self.use_dice_loss or self.use_bce_loss): + avg_loss += softmax_with_loss( + logit, + label, + mask, + num_classes=self.num_classes, + weight=self.class_weight, + ignore_index=self.ignore_index) + else: + if self.use_dice_loss: + avg_loss += dice_loss(logit, label, mask) + if self.use_bce_loss: + avg_loss += bce_loss( + logit, label, mask, ignore_index=self.ignore_index) + + return avg_loss + + def _conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + initializer=fluid.initializer.Constant(1.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + initializer=fluid.initializer.Constant(0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + bn = fluid.layers.relu(bn) + return bn diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py index bcb8e6d38de9becacce4d80e2ff54588c15352f4..6dc4ea7b95d876ae896c77395ab155bec1727a8a 100644 --- a/paddlex/cv/transforms/cls_transforms.py +++ b/paddlex/cv/transforms/cls_transforms.py @@ -92,6 +92,12 @@ class Compose(ClsTransform): outputs = (im, label) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class RandomCrop(ClsTransform): """对图像进行随机剪裁,模型训练时的数据增强操作。 @@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform): else: outputs = (im, ) return outputs + + +class ComposedClsTransforms(Compose): + """ 分类模型的基础Transforms流程,具体如下 + 训练阶段: + 1. 随机从图像中crop一块子图,并resize成crop_size大小 + 2. 将1的输出按0.5的概率随机进行水平翻转 + 3. 将图像进行归一化 + 验证/预测阶段: + 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 + 2. 从图像中心crop出一个大小为crop_size的图像 + 3. 将图像进行归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + crop_size(int|list): 输入模型里的图像大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + crop_size=[224, 224], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = crop_size + if isinstance(crop_size, list): + if crop_size[0] != crop_size[1]: + raise Exception( + "In classifier model, width and height should be equal, please modify your parameter `crop_size`" + ) + width = crop_size[0] + if width % 32 != 0: + raise Exception( + "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`" + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5), + Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + ResizeByShort(short_size=int(width * 1.14)), + CenterCrop(crop_size=width), Normalize( + mean=mean, std=std) + ] + + super(ComposedClsTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index ca892d75bc7913cd1e238bb96e82e76f9a4fd716..19db33173b87b7cc20b87054cfbc1241176abc58 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -152,6 +152,12 @@ class Compose(DetTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class ResizeByShort(DetTransform): """根据图像的短边调整图像大小(resize)。 @@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform): im_shape = im_info['image_shape'] outputs = (im, im_shape) return outputs + + +class ComposedRCNNTransforms(Compose): + """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下, + 训练阶段: + 1. 随机以0.5的概率将图像水平翻转 + 2. 图像归一化 + 3. 图像按比例Resize,scale计算方式如下 + scale = min_max_size[0] / short_size_of_image + if max_size_of_image * scale > min_max_size[1]: + scale = min_max_size[1] / max_size_of_image + 4. 将3步骤的长宽进行padding,使得长宽为32的倍数 + 验证阶段: + 1. 图像归一化 + 2. 图像按比例Resize,scale计算方式同上训练阶段 + 3. 将2步骤的长宽进行padding,使得长宽为32的倍数 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + min_max_size(list): 图像在缩放时,最小边和最大边的约束条件 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + min_max_size=[800, 1333], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + else: + # 验证/预测时的transforms + transforms = [ + Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + + super(ComposedRCNNTransforms, self).__init__(transforms) + + +class ComposedYOLOTransforms(Compose): + """YOLOv3模型的图像预处理流程,具体如下, + 训练阶段: + 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage + 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 + 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand + 4. 随机裁剪图像 + 5. 将4步骤的输出图像Resize成shape参数的大小 + 6. 随机0.5的概率水平翻转图像 + 7. 图像归一化 + 验证/预测阶段: + 1. 将图像Resize成shape参数大小 + 2. 图像归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + shape=[608, 608], + mixup_epoch=250, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = shape + if isinstance(shape, list): + if shape[0] != shape[1]: + raise Exception( + "In YOLOv3 model, width and height should be equal") + width = shape[0] + if width % 32 != 0: + raise Exception( + "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...." + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + MixupImage(mixup_epoch=mixup_epoch), RandomDistort(), + RandomExpand(), RandomCrop(), Resize( + target_size=width, + interp='RANDOM'), RandomHorizontalFlip(), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + Resize( + target_size=width, interp='CUBIC'), Normalize( + mean=mean, std=std) + ] + super(ComposedYOLOTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py index e562ba2601677085fdef23c57a6779ba77143f8d..d3c67648d500d915315c5607cfc5c2f5538a9090 100644 --- a/paddlex/cv/transforms/seg_transforms.py +++ b/paddlex/cv/transforms/seg_transforms.py @@ -108,6 +108,12 @@ class Compose(SegTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + self.transforms = augmenters + self.transforms.transforms + class RandomHorizontalFlip(SegTransform): """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 @@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform): return (im, im_info) else: return (im, ) + + +class ComposedSegTransforms(Compose): + """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下 + 训练阶段: + 1. 随机对图像以0.5的概率水平翻转 + 2. 按不同的比例随机Resize原图 + 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 + 4. 图像归一化 + 预测阶段: + 1. 图像归一化 + + Args: + mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + train_crop_size=[769, 769], + mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), ResizeStepScaling(), + RandomPaddingCrop(crop_size=train_crop_size), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [Resize(512), Normalize(mean=mean, std=std)] + + super(ComposedSegTransforms, self).__init__(transforms) diff --git a/paddlex/deploy.py b/paddlex/deploy.py index bb2618c1d844836a4884d93218f7d67434103b8e..0aee491ecdda1609b8827f94d0412a26bf053650 100644 --- a/paddlex/deploy.py +++ b/paddlex/deploy.py @@ -97,8 +97,6 @@ class Predictor: config.disable_glog_info() if memory_optimize: config.enable_memory_optim() - else: - config.diable_memory_optim() # 开启计算图分析优化,包括OP融合等 config.switch_ir_optim(True) diff --git a/paddlex/seg.py b/paddlex/seg.py index 0f92813d45b4e7f5e08ee64fbd6cfa675087ba4a..c19ec30c883e92f91dabea4488d1890495c99934 100644 --- a/paddlex/seg.py +++ b/paddlex/seg.py @@ -17,5 +17,6 @@ from . import cv UNet = cv.models.UNet DeepLabv3p = cv.models.DeepLabv3p +HRNet = cv.models.HRNet transforms = cv.transforms.seg_transforms visualize = cv.models.utils.visualize.visualize_segmentation diff --git a/paddlex/tools/x2coco.py b/paddlex/tools/x2coco.py index 595a6e6d328d08ff9aa8000c5afb465786c31431..4c893dcc9319ffc4353d4e376a802301d047120a 100644 --- a/paddlex/tools/x2coco.py +++ b/paddlex/tools/x2coco.py @@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO): annotation["segmentation"] = [list(np.asarray(points).flatten())] annotation["iscrowd"] = 0 annotation["image_id"] = image_id + 1 - annotation["bbox"] = list(map(float, get_bbox(height, width, points))) + annotation["bbox"] = list(map(float, self.get_bbox(height, width, points))) annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3] annotation["category_id"] = label_to_num[label] annotation["id"] = object_id + 1 @@ -254,4 +254,4 @@ class EasyData2COCO(X2COCO): segmentation.append(contour_list) self.annotations_list.append( self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id, - label_to_num)) \ No newline at end of file + label_to_num)) diff --git a/setup.py b/setup.py index a044495c902f6b754a69265c5020d7dbda992b14..bba199719ce65075f8a61b965bca49f026406c91 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit setuptools.setup( name="paddlex", - version='1.0.2', + version='1.0.4', author="paddlex", author_email="paddlex@baidu.com", description=long_description, diff --git a/tutorials/train/segmentation/hrnet.py b/tutorials/train/segmentation/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f887b78c3ae16ae66235f1965ada8bd2355d62c6 --- /dev/null +++ b/tutorials/train/segmentation/hrnet.py @@ -0,0 +1,50 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +train_transforms = transforms.Compose([ + transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(), + transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize() +]) + +eval_transforms = transforms.Compose([ + transforms.ResizeByLong(long_size=512), + transforms.Padding(target_size=512), transforms.Normalize() +]) + +# 定义训练和验证所用的数据集 +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP +num_classes = len(train_dataset.labels) +model = pdx.seg.HRNet(num_classes=num_classes) +model.train( + num_epochs=20, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/hrnet', + use_vdl=True)