diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index 67bb73717df64d02c63df32c78aed07adfa69bb9..3fcfbc7a06364745bc5fe3eb491f3067afdb6e58 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -149,21 +149,31 @@ Available top-1/top-5 validation accuracy on ImageNet 2012 are listed in table. - Released models: specify parameter names -|model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) | -|- |:-: |:-:| -|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.71%/79.18% | 55.88%/78.65% | -|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.22%/89.09% | 69.01%/88.90% | -|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.14%/89.48% | 69.83%/89.13% | -|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.08%/90.63% | 71.65%/90.57% | -|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.83% | 72.32%/90.98% | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.91%/89.54% | 70.51%/89.35% | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 71.90%/90.55% | 71.53%/90.41% | -|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.85%/89.89% | 70.65%/89.89% | -|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.41%/92.03% | 74.13%/91.97% | -|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.35%/92.80% | 76.22%/92.92% | -|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.49%/93.57% | 77.56%/93.64% | -|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.12%/93.93% | 77.92%/93.87% | -|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.50%/94.01% | 78.44%/93.96% | -|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.26%/94.22% | 79.12%/94.20% | -|[GoogleNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.50%/89.59% | 70.27%/89.58% | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | | 69.48%/88.99% | +|model | top-1/top-5 accuracy(CV2) | +|- |:-: | +|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | +|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | +|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | +|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | +|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | +|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | +|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | +|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | +|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | +|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35%/94.03% | +|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | +|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | +|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | +|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | +|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.93%/95.33% | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | +|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | +|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | +|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | +|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.45%/95.49% | +|[GoogleNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | +|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.88%/95.28% | + diff --git a/PaddleCV/image_classification/README_cn.md b/PaddleCV/image_classification/README_cn.md index f224c982f649adfa6901518059b4e99ef9473e46..e5a7874d06ca634e4553ab15d4447a9fd210cd63 100644 --- a/PaddleCV/image_classification/README_cn.md +++ b/PaddleCV/image_classification/README_cn.md @@ -85,7 +85,7 @@ python train.py \ 在```run.sh```中有用于训练的脚本. -**数据读取器说明:** 数据读取器定义在```reader.py```和```reader_cv2.py```中。一般, CV2可以提高数据读取速度, PIL reader可以得到相对更高的精度, 我们现在默认基于PIL的数据读取器, 在[训练阶段](#模型训练), 默认采用的增广方式是随机裁剪与水平翻转, 而在[模型评估](#模型评估)与[模型预测](#模型预测)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: +**数据读取器说明:** 数据读取器定义在```reader.py```和```reader_cv2.py```中。一般, CV2可以提高数据读取速度, PIL reader可以得到相对更高的精度, 我们现在默认基于cv2的数据读取器, 在[训练阶段](#模型训练), 默认采用的增广方式是随机裁剪与水平翻转, 而在[模型评估](#模型评估)与[模型预测](#模型预测)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: * 旋转 * 颜色抖动 * 随机裁剪 @@ -143,23 +143,32 @@ python infer.py \ 表格中列出了在```models```目录下支持的图像分类模型,并且给出了已完成训练的模型在ImageNet-2012验证集合上的top-1/top-5精度, 可以通过点击相应模型的名称下载相应预训练模型。 -- Released models: - -|model | top-1/top-5 accuracy(PIL)| top-1/top-5 accuracy(CV2) | -|- |:-: |:-:| -|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.71%/79.18% | 55.88%/78.65% | -|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.22%/89.09% | 69.01%/88.90% | -|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.14%/89.48% | 69.83%/89.13% | -|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.08%/90.63% | 71.65%/90.57% | -|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.83% | 72.32%/90.98% | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.91%/89.54% | 70.51%/89.35% | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 71.90%/90.55% | 71.53%/90.41% | -|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.85%/89.89% | 70.65%/89.89% | -|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.41%/92.03% | 74.13%/91.97% | -|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.35%/92.80% | 76.22%/92.92% | -|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.49%/93.57% | 77.56%/93.64% | -|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.12%/93.93% | 77.92%/93.87% | -|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.50%/94.01% | 78.44%/93.96% | -|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.26%/94.22% | 79.12%/94.20% | -|[GoogleNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.50%/89.59% | 70.27%/89.58% | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | | 69.48%/88.99% | +- Released models: specify parameter names + +|model | top-1/top-5 accuracy(CV2) | +|- |:-: | +|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | +|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | +|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | +|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | +|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | +|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | +|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | +|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | +|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | +|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35%/94.03% | +|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | +|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | +|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | +|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | +|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.93%/95.33% | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | +|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | +|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | +|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | +|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.45%/95.49% | +|[GoogleNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | +|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.88%/95.28% | diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index 458991ca732a22f3774568ffbfa84514ddadfe5c..2661aaa27a47cb2e743649b0620765d816823e71 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -4,9 +4,15 @@ from .mobilenet_v2 import MobileNetV2 from .googlenet import GoogleNet from .vgg import VGG11, VGG13, VGG16, VGG19 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 +from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc +from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd +from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d +from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d + from .resnet_dist import DistResNet from .inception_v4 import InceptionV4 from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d +from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE154_vd from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .shufflenet_v2 import ShuffleNetV2, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish, ShuffleNetV2_x8_0_swish from .fast_imagenet import FastImageNet diff --git a/PaddleCV/image_classification/models/resnet_dist.py b/PaddleCV/image_classification/models/resnet_dist.py index 3420d790c25534b4a73ea660b2d880ff899ee62f..4656b4a260758d8b24eecee99a3a672d5de671d0 100644 --- a/PaddleCV/image_classification/models/resnet_dist.py +++ b/PaddleCV/image_classification/models/resnet_dist.py @@ -1,3 +1,4 @@ +#NOTE: This is for distributed resnet from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/PaddleCV/image_classification/models/resnet_vc.py b/PaddleCV/image_classification/models/resnet_vc.py new file mode 100644 index 0000000000000000000000000000000000000000..7572660d2f2342bfa65b794235d01e258905b580 --- /dev/null +++ b/PaddleCV/image_classification/models/resnet_vc.py @@ -0,0 +1,150 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet50_vc", "ResNet101_vc", "ResNet152_vc"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50): + self.params = train_parameters + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name="res"+str(block+2)+"a" + else: + conv_name="res"+str(block+2)+"b"+str(i) + else: + conv_name="res"+str(block+2)+chr(97+i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1,name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, + stdv))) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + name=bn_name+'.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance',) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name+"_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5") + + +def ResNet50_vc(): + model = ResNet(layers=50) + return model + + +def ResNet101_vc(): + model = ResNet(layers=101) + return model + + +def ResNet152_vc(): + model = ResNet(layers=152) + return model diff --git a/PaddleCV/image_classification/models/resnet_vd.py b/PaddleCV/image_classification/models/resnet_vd.py new file mode 100644 index 0000000000000000000000000000000000000000..3b8db164b48d0ea6d1b3ba4fe762ae310de044f5 --- /dev/null +++ b/PaddleCV/image_classification/models/resnet_vd.py @@ -0,0 +1,206 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + +__all__ = ["ResNet", "ResNet50_vd","ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, is_3x3 = False): + self.params = train_parameters + self.layers = layers + self.is_3x3 = is_3x3 + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name="res"+str(block+2)+"a" + else: + conv_name="res"+str(block+2)+"b"+str(i) + else: + conv_name="res"+str(block+2)+chr(97+i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block==0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + + + return out + + + + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d(input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name+"_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + + + +def ResNet50_vd(): + model = ResNet(layers=50, is_3x3 = True) + return model + +def ResNet101_vd(): + model = ResNet(layers=101, is_3x3 = True) + return model + +def ResNet152_vd(): + model = ResNet(layers=152, is_3x3 = True) + return model + +def ResNet200_vd(): + model = ResNet(layers=200, is_3x3 = True) + return model + diff --git a/PaddleCV/image_classification/models/resnext.py b/PaddleCV/image_classification/models/resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b5e110d2c753781e6702f177e368df30512670 --- /dev/null +++ b/PaddleCV/image_classification/models/resnext.py @@ -0,0 +1,164 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNeXt(): + def __init__(self, layers=50): + self.params = train_parameters + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [256, 512, 1024, 2048] + cardinality = 64 + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="res_conv1") #debug + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + def shortcut(self, input, ch_out, stride, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, num_filters, stride, name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + +def ResNeXt50_64x4d(): + model = ResNeXt(layers=50) + return model + + +def ResNeXt101_64x4d(): + model = ResNeXt(layers=101) + return model + + +def ResNeXt152_64x4d(): + model = ResNeXt(layers=152) + return model diff --git a/PaddleCV/image_classification/models/resnext_vd.py b/PaddleCV/image_classification/models/resnext_vd.py new file mode 100644 index 0000000000000000000000000000000000000000..e05c7735e345bdbfb8961a5ccdc95997f4af3efd --- /dev/null +++ b/PaddleCV/image_classification/models/resnext_vd.py @@ -0,0 +1,201 @@ +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + +__all__ = ["ResNeXt", "ResNeXt50_vd_64x4d","ResNeXt101_vd_64x4d", "ResNeXt152_vd_64x4d"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNeXt(): + def __init__(self, layers=50, is_3x3 = False): + self.params = train_parameters + self.layers = layers + self.is_3x3 = is_3x3 + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [256, 512, 1024, 2048] + cardinality = 64 + + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name="res"+str(block+2)+"a" + else: + conv_name="res"+str(block+2)+"b"+str(i) + else: + conv_name="res"+str(block+2)+chr(97+i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + if_first=block==0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + + return out + + + + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d(input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, cardinality, name, if_first): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + groups=cardinality, + name=name+"_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters, filter_size=1, act=None, name=name+"_branch2c") + + short = self.shortcut(input, num_filters, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + + + +def ResNeXt50_vd_64x4d(): + model = ResNeXt(layers=50, is_3x3 = True) + return model + +def ResNeXt101_vd_64x4d(): + model = ResNeXt(layers=101, is_3x3 = True) + return model + +def ResNeXt152_vd_64x4d(): + model = ResNeXt(layers=152, is_3x3 = True) + return model + + diff --git a/PaddleCV/image_classification/models/se_resnext_vd.py b/PaddleCV/image_classification/models/se_resnext_vd.py new file mode 100644 index 0000000000000000000000000000000000000000..4c1a37fece78f709f8d026c049b84e8d8eddf913 --- /dev/null +++ b/PaddleCV/image_classification/models/se_resnext_vd.py @@ -0,0 +1,257 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "SE_ResNeXt", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", + "SE154_vd" +] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [10, 16, 20], + "steps": [0.01, 0.001, 0.0001, 0.00001] + } +} + + +class SE_ResNeXt(): + def __init__(self, layers=50): + self.params = train_parameters + self.layers = layers + + def net(self, input, class_dim=1000): + layers = self.layers + supported_layers = [50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + if layers == 50: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + elif layers == 101: + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 23, 3] + num_filters = [128, 256, 512, 1024] + + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + elif layers == 152: + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [256, 512, 1024, 2048] + + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu',name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ + pool_type='max') + n = 1 if layers == 50 or layers == 101 else 3 + for block in range(len(depth)): + n += 1 + for i in range(depth[block]): + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio, + if_first=block==0, + name=str(n)+'_'+str(i+1)) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + if layers == 152: + pool = fluid.layers.dropout(x=pool, dropout_prob=0.2) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + return out + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + filter_size = 1 + if if_first: + return self.conv_bn_layer(input, ch_out, filter_size, stride, name='conv'+name+'_prj') + else: + return self.conv_bn_layer_new(input, ch_out, filter_size, stride, name='conv'+name+'_prj') + else: + return input + + + def bottleneck_block(self, input, num_filters, stride, cardinality, + reduction_ratio,if_first, name=None): + conv0 = self.conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu',name='conv'+name+'_x1') + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu', + name='conv'+name+'_x2') + if cardinality == 64: + num_filters = num_filters / 2 + conv2 = self.conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None, name='conv'+name+'_x3') + scale = self.squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio, + name='fc'+name) + + short = self.shortcut(input, num_filters * 2, stride, if_first=if_first, name=name) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False, + param_attr=ParamAttr(name=name + '_weights'), + ) + bn_name = name + "_bn" + return fluid.layers.batch_norm(input=conv, act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d(input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg') + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + def squeeze_excitation(self, input, num_channels, reduction_ratio, name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc(input=pool, + size=num_channels / reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv),name=name+'_sqz_weights'), + bias_attr=ParamAttr(name=name+'_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv),name=name+'_exc_weights'), + bias_attr=ParamAttr(name=name+'_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def SE_ResNeXt50_32x4d_vd(): + model = SE_ResNeXt(layers=50) + return model + + +def SE_ResNeXt101_32x4d_vd(): + model = SE_ResNeXt(layers=101) + return model + + +def SE154_vd(): + model = SE_ResNeXt(layers=152) + return model diff --git a/PaddleCV/image_classification/reader_cv2.py b/PaddleCV/image_classification/reader_cv2.py index 7be5baa8014562d44371372318e4c8c81303c5fe..09fdfb0cd7aa4a34d34d999048e6bbca0fe6e4d3 100644 --- a/PaddleCV/image_classification/reader_cv2.py +++ b/PaddleCV/image_classification/reader_cv2.py @@ -15,11 +15,11 @@ DATA_DIM = 224 THREAD = 8 BUF_SIZE = 102400 -DATA_DIR = 'data/ILSVRC2012' +DATA_DIR = './data/ILSVRC2012' + img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - def rotate_image(img): """ rotate_image """ (h, w) = img.shape[:2] @@ -29,19 +29,22 @@ def rotate_image(img): rotated = cv2.warpAffine(img, M, (w, h)) return rotated - -def random_crop(img, size, scale=None, ratio=None): +def random_crop(img, size, settings, scale=None, ratio=None): """ random_crop """ - scale = [0.08, 1.0] if scale is None else scale - ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + lower_scale = settings.lower_scale + lower_ratio = settings.lower_ratio + upper_ratio = settings.upper_ratio + scale = [lower_scale, 1.0] if scale is None else scale + ratio = [lower_ratio, upper_ratio] if ratio is None else ratio + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) w = 1. * aspect_ratio h = 1. / aspect_ratio + bound = min((float(img.shape[0]) / img.shape[1]) / (h**2), + (float(img.shape[1]) / img.shape[0]) / (w**2)) - bound = min((float(img.shape[0]) / img.shape[1]) / (w**2), - (float(img.shape[1]) / img.shape[0]) / (h**2)) scale_max = min(scale[1], bound) scale_min = min(scale[0], bound) @@ -50,27 +53,29 @@ def random_crop(img, size, scale=None, ratio=None): target_size = math.sqrt(target_area) w = int(target_size * w) h = int(target_size * h) - i = np.random.randint(0, img.shape[0] - w + 1) - j = np.random.randint(0, img.shape[1] - h + 1) + i = np.random.randint(0, img.shape[0] - h + 1) + j = np.random.randint(0, img.shape[1] - w + 1) - img = img[i:i + w, j:j + h, :] + img = img[i:i + h, j:j + w, :] - resized = cv2.resize(img, (size, size), interpolation=cv2.INTER_LANCZOS4) + resized = cv2.resize(img, (size, size) + #, interpolation=cv2.INTER_LANCZOS4 + ) return resized def distort_color(img): return img - def resize_short(img, target_size): """ resize_short """ percent = float(target_size) / min(img.shape[0], img.shape[1]) resized_width = int(round(img.shape[1] * percent)) resized_height = int(round(img.shape[0] * percent)) - resized = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_LANCZOS4) + resized = cv2.resize(img, (resized_width, resized_height), + #interpolation=cv2.INTER_LANCZOS4 + ) return resized - def crop_image(img, target_size, center): """ crop_image """ height, width = img.shape[:2] @@ -86,8 +91,51 @@ def crop_image(img, target_size, center): img = img[h_start:h_end, w_start:w_end, :] return img - -def process_image(sample, +def create_mixup_reader(settings, rd): + class context: + tmp_mix = [] + tmp_l1 = [] + tmp_l2 = [] + tmp_lam = [] + + batch_size = settings.batch_size + alpha = settings.mixup_alpha + def fetch_data(): + + data_list = [] + for i, item in enumerate(rd()): + data_list.append(item) + if i % batch_size == batch_size - 1: + yield data_list + data_list =[] + + def mixup_data(): + + for data_list in fetch_data(): + if alpha > 0.: + lam = np.random.beta(alpha, alpha) + else: + lam = 1. + l1 = np.array(data_list) + l2 = np.random.permutation(l1) + mixed_l = [l1[i][0] * lam + (1 - lam) * l2[i][0] for i in range(len(l1))] + yield mixed_l, l1, l2, lam + + def mixup_reader(): + + for context.tmp_mix, context.tmp_l1, context.tmp_l2, context.tmp_lam in mixup_data(): + for i in range(len(context.tmp_mix)): + mixed_l = context.tmp_mix[i] + l1 = context.tmp_l1[i] + l2 = context.tmp_l2[i] + lam = context.tmp_lam + yield mixed_l, l1[1], l2[1], lam + + return mixup_reader + +def process_image( + sample, + settings, mode, color_jitter, rotate, @@ -106,14 +154,15 @@ def process_image(sample, if rotate: img = rotate_image(img) if crop_size > 0: - img = random_crop(img, crop_size) + img = random_crop(img, crop_size,settings) if color_jitter: img = distort_color(img) if np.random.randint(0, 2) == 1: img = img[:, ::-1, :] else: if crop_size > 0: - img = resize_short(img, crop_size) + target_size = settings.resize_short_size + img = resize_short(img, 256) img = crop_image(img, target_size=crop_size, center=True) @@ -134,7 +183,8 @@ def image_mapper(**kwargs): return functools.partial(process_image, **kwargs) -def _reader_creator(file_list, +def _reader_creator(settings, + file_list, mode, shuffle=False, color_jitter=False, @@ -165,18 +215,17 @@ def _reader_creator(file_list, for line in lines: if mode == 'train' or mode == 'val': img_path, label = line.split() - img_path = img_path.replace("JPEG", "jpeg") img_path = os.path.join(data_dir, img_path) yield img_path, int(label) elif mode == 'test': img_path, label = line.split() - img_path = img_path.replace("JPEG", "jpeg") img_path = os.path.join(data_dir, img_path) yield [img_path] image_mapper = functools.partial( process_image, + settings=settings, mode=mode, color_jitter=color_jitter, rotate=rotate, @@ -185,23 +234,25 @@ def _reader_creator(file_list, image_mapper, reader, THREAD, BUF_SIZE, order=False) return reader - -def train(data_dir=DATA_DIR, pass_id_as_seed=0): - +def train(settings, data_dir=DATA_DIR, pass_id_as_seed=0): file_list = os.path.join(data_dir, 'train_list.txt') - return _reader_creator( + reader = _reader_creator( + settings, file_list, 'train', shuffle=True, color_jitter=False, rotate=False, data_dir=data_dir, - pass_id_as_seed=pass_id_as_seed) - + pass_id_as_seed=pass_id_as_seed, + ) + if settings.use_mixup == True: + reader = create_mixup_reader(settings, reader) + return reader -def val(data_dir=DATA_DIR): +def val(settings,data_dir=DATA_DIR): file_list = os.path.join(data_dir, 'val_list.txt') - return _reader_creator(file_list, 'val', shuffle=False, + return _reader_creator(settings ,file_list, 'val', shuffle=False, data_dir=data_dir) diff --git a/PaddleCV/image_classification/run.sh b/PaddleCV/image_classification/run.sh index cc516a677771c2c22bdf702d0ae77916a1bd8f06..262556f34e3f52def69b3a0248f76af896444334 100755 --- a/PaddleCV/image_classification/run.sh +++ b/PaddleCV/image_classification/run.sh @@ -13,6 +13,146 @@ python train.py \ --num_epochs=200 \ --l2_decay=1.2e-4 \ # >log_SE_ResNeXt50_32x4d.txt 2>&1 & + +#SE_154 +""" +python train.py \ + --model=SE_154_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ + +#ResNeXt101_64x4d +python train.py \ + --model=ResNeXt101_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=15e-5 + +python train.py \ +#ResNeXt101_vd_64x4d + --model=ResNeXt101_vd_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 + +#InceptionV4 +python train.py + --model=InceptionV4 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.045 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --resize_short_size=320 \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ +#ResNet152_vd +python train.py + --model=ResNet152_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 + +#ResNet200_vd +python train.py + --model=ResNet200_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 + +#ResNet50_vd +python train.py + --model=ResNet50_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=7e-5 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 +#ResNet50_vc +python train.py + --model=ResNet50_vc \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --input_dtype=float32 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --with_mem_opt=True \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ +""" + #AlexNet: #python train.py \ # --model=AlexNet \ diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index d53eefb2c93565c14e3511bdea023e34335eab0b..b896e91d9a75af7a224961882a232491bdbb7232 100644 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -10,7 +10,7 @@ import math import paddle import paddle.fluid as fluid import paddle.dataset.flowers as flowers -import reader as reader +import reader_cv2 as reader import argparse import functools import subprocess @@ -24,6 +24,7 @@ IMAGENET1000 = 1281167 parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) + # yapf: disable add_arg('batch_size', int, 256, "Minibatch size.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") @@ -37,13 +38,22 @@ add_arg('pretrained_model', str, None, "Whether to use pretrai add_arg('checkpoint', str, None, "Whether to resume checkpoint.") add_arg('lr', float, 0.1, "set learning rate.") add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.") -add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") +add_arg('model', str, "ResNet50", "Set the network to use.") add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") -add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.") +add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root dir.") add_arg('fp16', bool, False, "Enable half precision training with fp16." ) add_arg('scale_loss', float, 1.0, "Scale loss for fp16." ) add_arg('l2_decay', float, 1e-4, "L2_decay parameter.") add_arg('momentum_rate', float, 0.9, "momentum_rate.") +add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing or not") +add_arg('label_smoothing_epsilon', float, 0.2, "Set the label_smoothing_epsilon parameter") +add_arg('lower_scale', float, 0.08, "Set the lower_scale in ramdom_crop") +add_arg('lower_ratio', float, 3./4., "Set the lower_ratio in ramdom_crop") +add_arg('upper_ratio', float, 4./3., "Set the upper_ratio in ramdom_crop") +add_arg('resize_short_size', int, 256, "Set the resize_short_size") +add_arg('use_mixup', bool, False, "Whether to use mixup or not") +add_arg('mixup_alpha', float, 0.2, "Set the mixup_alpha parameter") +add_arg('is_distill', bool, False, "is distill or not") def optimizer_setting(params): ls = params["learning_strategy"] @@ -153,13 +163,25 @@ def optimizer_setting(params): return optimizer -def net_config(image, label, model, args): +def calc_loss(epsilon,label,class_dim,softmax_out,use_label_smoothing): + if use_label_smoothing: + label_one_hot = fluid.layers.one_hot(input=label, depth=class_dim) + smooth_label = fluid.layers.label_smooth(label=label_one_hot, epsilon=epsilon, dtype="float32") + loss = fluid.layers.cross_entropy(input=softmax_out, label=smooth_label, soft_label=True) + else: + loss = fluid.layers.cross_entropy(input=softmax_out, label=label) + return loss + + +def net_config(image, model, args, is_train, label=0, y_a=0, y_b=0, lam=0.0): model_list = [m for m in dir(models) if "__" not in m] assert args.model in model_list, "{} is not lists: {}".format(args.model, model_list) - class_dim = args.class_dim model_name = args.model + use_mixup = args.use_mixup + use_label_smoothing = args.use_label_smoothing + epsilon = args.label_smoothing_epsilon if args.enable_ce: assert model_name == "SE_ResNeXt50_32x4d" @@ -178,21 +200,41 @@ def net_config(image, label, model, args): avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2 acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) + else: - out = model.net(input=image, class_dim=class_dim) - cost, pred = fluid.layers.softmax_with_cross_entropy( - out, label, return_softmax=True) + if not args.is_distill: + out = model.net(input=image, class_dim=class_dim) + softmax_out = fluid.layers.softmax(out, use_cudnn=False) + if is_train: + if use_mixup: + loss_a = calc_loss(epsilon,y_a,class_dim,softmax_out,use_label_smoothing) + loss_b = calc_loss(epsilon,y_b,class_dim,softmax_out,use_label_smoothing) + loss_a_mean = fluid.layers.mean(x = loss_a) + loss_b_mean = fluid.layers.mean(x = loss_b) + cost = lam * loss_a_mean + (1 - lam) * loss_b_mean + avg_cost = fluid.layers.mean(x=cost) + if args.scale_loss > 1: + avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) + return avg_cost + else: + cost = calc_loss(epsilon,label,class_dim,softmax_out,use_label_smoothing) + + else: + cost = fluid.layers.cross_entropy(input=softmax_out, label=label) + else: + out1, out2 = model.net(input=image, class_dim=args.class_dim) + softmax_out1, softmax_out = fluid.layers.softmax(out1), fluid.layers.softmax(out2) + smooth_out1 = fluid.layers.label_smooth(label=softmax_out1, epsilon=0.0, dtype="float32") + cost = fluid.layers.cross_entropy(input=softmax_out, label=smooth_out1, soft_label=True) + + avg_cost = fluid.layers.mean(cost) if args.scale_loss > 1: avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) - else: - avg_cost = fluid.layers.mean(x=cost) - - acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5) + acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) return avg_cost, acc_top1, acc_top5 - def build_program(is_train, main_prog, startup_prog, args): image_shape = [int(m) for m in args.image_shape.split(",")] model_name = args.model @@ -201,20 +243,40 @@ def build_program(is_train, main_prog, startup_prog, args): model_list) model = models.__dict__[model_name]() with fluid.program_guard(main_prog, startup_prog): - py_reader = fluid.layers.py_reader( - capacity=16, - shapes=[[-1] + image_shape, [-1, 1]], - lod_levels=[0, 0], - dtypes=["float32", "int64"], - use_double_buffer=True) + use_mixup = args.use_mixup + if is_train and use_mixup: + py_reader = fluid.layers.py_reader( + capacity=16, + shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1]], + lod_levels=[0, 0, 0, 0], + dtypes=["float32", "int64", "int64", "float32"], + use_double_buffer=True) + else: + py_reader = fluid.layers.py_reader( + capacity=16, + shapes=[[-1] + image_shape, [-1, 1]], + lod_levels=[0, 0], + dtypes=["float32", "int64"], + use_double_buffer=True) + with fluid.unique_name.guard(): - image, label = fluid.layers.read_file(py_reader) - if args.fp16: - image = fluid.layers.cast(image, "float16") - avg_cost, acc_top1, acc_top5 = net_config(image, label, model, args) - avg_cost.persistable = True - acc_top1.persistable = True - acc_top5.persistable = True + if is_train and use_mixup: + image, y_a, y_b, lam = fluid.layers.read_file(py_reader) + if args.fp16: + image = fluid.layers.cast(image, "float16") + avg_cost = net_config(image=image, y_a=y_a, y_b=y_b, lam=lam, model=model, args=args, label=0, is_train=True) + avg_cost.persistable = True + build_program_out = [py_reader, avg_cost] + else: + image, label = fluid.layers.read_file(py_reader) + if args.fp16: + image = fluid.layers.cast(image, "float16") + avg_cost, acc_top1, acc_top5 = net_config(image, model, args, label=label, is_train=is_train) + avg_cost.persistable = True + acc_top1.persistable = True + acc_top5.persistable = True + build_program_out = [py_reader, avg_cost, acc_top1, acc_top5] + if is_train: params = model.params params["total_images"] = args.total_images @@ -236,11 +298,9 @@ def build_program(is_train, main_prog, startup_prog, args): else: optimizer.minimize(avg_cost) global_lr = optimizer._global_learning_rate() + build_program_out.append(global_lr) - if is_train: - return py_reader, avg_cost, acc_top1, acc_top5, global_lr - else: - return py_reader, avg_cost, acc_top1, acc_top5 + return build_program_out def get_device_num(): visible_device = os.getenv('CUDA_VISIBLE_DEVICES') @@ -257,6 +317,7 @@ def train(args): pretrained_model = args.pretrained_model with_memory_optimization = args.with_mem_opt model_save_dir = args.model_save_dir + use_mixup = args.use_mixup startup_prog = fluid.Program() train_prog = fluid.Program() @@ -265,16 +326,25 @@ def train(args): startup_prog.random_seed = 1000 train_prog.random_seed = 1000 - train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( - is_train=True, - main_prog=train_prog, - startup_prog=startup_prog, - args=args) - test_py_reader, test_cost, test_acc1, test_acc5 = build_program( - is_train=False, - main_prog=test_prog, - startup_prog=startup_prog, - args=args) + b_out = build_program( + is_train=True, + main_prog=train_prog, + startup_prog=startup_prog, + args=args) + if use_mixup: + train_py_reader, train_cost, global_lr = b_out[0], b_out[1], b_out[2] + train_fetch_list = [train_cost.name, global_lr.name] + + else: + train_py_reader, train_cost, train_acc1, train_acc5, global_lr = b_out[0],b_out[1],b_out[2],b_out[3],b_out[4] + train_fetch_list = [train_cost.name, train_acc1.name, train_acc5.name, global_lr.name] + + b_out_test = build_program( + is_train=False, + main_prog=test_prog, + startup_prog=startup_prog, + args=args) + test_py_reader, test_cost, test_acc1, test_acc5 = b_out_test[0],b_out_test[1],b_out_test[2],b_out_test[3] test_prog = test_prog.clone(for_test=True) if with_memory_optimization: @@ -305,8 +375,8 @@ def train(args): test_batch_size = 16 if not args.enable_ce: train_reader = paddle.batch( - reader.train(), batch_size=train_batch_size, drop_last=True) - test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) + reader.train(settings=args), batch_size=train_batch_size, drop_last=True) + test_reader = paddle.batch(reader.val(settings=args), batch_size=test_batch_size) else: # use flowers dataset for CE and set use_xmap False to avoid disorder data # but it is time consuming. For faster speed, need another dataset. @@ -333,16 +403,12 @@ def train(args): else: train_exe = exe - train_fetch_list = [ - train_cost.name, train_acc1.name, train_acc5.name, global_lr.name - ] test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name] params = models.__dict__[args.model]().params for pass_id in range(params["num_epochs"]): train_py_reader.start() - train_info = [[], [], []] test_info = [[], [], []] train_time = [] @@ -350,37 +416,48 @@ def train(args): try: while True: t1 = time.time() - - if use_ngraph: - loss, acc1, acc5, lr = train_exe.run( - train_prog, fetch_list=train_fetch_list) + if use_mixup: + if use_ngraph: + loss, lr = train_exe.run(train_prog, fetch_list=train_fetch_list) + else: + loss, lr = train_exe.run(fetch_list=train_fetch_list) else: - loss, acc1, acc5, lr = train_exe.run( - fetch_list=train_fetch_list) + if use_ngraph: + loss, acc1, acc5, lr = train_exe.run(train_prog, fetch_list=train_fetch_list) + else: + loss, acc1, acc5, lr = train_exe.run(fetch_list=train_fetch_list) + + acc1 = np.mean(np.array(acc1)) + acc5 = np.mean(np.array(acc5)) + train_info[1].append(acc1) + train_info[2].append(acc5) + t2 = time.time() period = t2 - t1 + loss = np.mean(np.array(loss)) - acc1 = np.mean(np.array(acc1)) - acc5 = np.mean(np.array(acc5)) train_info[0].append(loss) - train_info[1].append(acc1) - train_info[2].append(acc5) lr = np.mean(np.array(lr)) train_time.append(period) if batch_id % 10 == 0: - print("Pass {0}, trainbatch {1}, loss {2}, \ - acc1 {3}, acc5 {4}, lr {5}, time {6}" - .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" % - lr, "%2.2f sec" % period)) + if use_mixup: + print("Pass {0}, trainbatch {1}, loss {2}, lr {3}, time {4}" + .format(pass_id, batch_id, "%.5f"%loss, "%.5f" %lr, "%2.2f sec" % period)) + else: + print("Pass {0}, trainbatch {1}, loss {2}, \ + acc1 {3}, acc5 {4}, lr {5}, time {6}" + .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" % + lr, "%2.2f sec" % period)) sys.stdout.flush() batch_id += 1 except fluid.core.EOFException: train_py_reader.reset() train_loss = np.array(train_info[0]).mean() - train_acc1 = np.array(train_info[1]).mean() - train_acc5 = np.array(train_info[2]).mean() + if not use_mixup: + train_acc1 = np.array(train_info[1]).mean() + train_acc5 = np.array(train_info[2]).mean() train_speed = np.array(train_time).mean() / (train_batch_size * device_num) @@ -414,10 +491,15 @@ def train(args): test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() - print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, " - "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format( - pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss, - "%.5f"%test_acc1, "%.5f"%test_acc5)) + if use_mixup: + print("End pass {0}, train_loss {1}, test_loss {4}, test_acc1 {5}, test_acc5 {6}".format( + pass_id, "%.5f"%train_loss, "%.5f"%test_loss, "%.5f"%test_acc1, "%.5f"%test_acc5)) + else: + + print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, " + "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format( + pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss, + "%.5f"%test_acc1, "%.5f"%test_acc5)) sys.stdout.flush() model_path = os.path.join(model_save_dir + '/' + model_name, diff --git a/PaddleCV/image_classification/utility.py b/PaddleCV/image_classification/utility.py deleted file mode 100644 index 5b10a179ac2231cb26ab42993b7300d5e99f44bc..0000000000000000000000000000000000000000 --- a/PaddleCV/image_classification/utility.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Contains common utility functions.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import distutils.util -import numpy as np -import six -from paddle.fluid import core - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs)