From 6896c7cd9c49d05618f50d3930553091c5acc73e Mon Sep 17 00:00:00 2001 From: ceci3 Date: Sun, 26 Apr 2020 16:06:42 +0800 Subject: [PATCH] add metric(fid_score) for gan (#4553) * fix init * update * add fid_score for gan * update readme --- PaddleCV/gan/README.md | 18 +- PaddleCV/gan/metric/compute_fid.py | 183 ++++++++++++++++ PaddleCV/gan/metric/inception.py | 302 +++++++++++++++++++++++++++ PaddleCV/gan/network/base_network.py | 2 +- PaddleCV/gan/requirements.txt | 1 + 5 files changed, 498 insertions(+), 8 deletions(-) create mode 100644 PaddleCV/gan/metric/compute_fid.py create mode 100644 PaddleCV/gan/metric/inception.py diff --git a/PaddleCV/gan/README.md b/PaddleCV/gan/README.md index 60e43c85..e07ff41d 100644 --- a/PaddleCV/gan/README.md +++ b/PaddleCV/gan/README.md @@ -18,12 +18,12 @@ 本图像生成模型库包含CGAN\[[3](#参考文献)\], DCGAN\[[4](#参考文献)\], Pix2Pix\[[5](#参考文献)\], CycleGAN\[[6](#参考文献)\], StarGAN\[[7](#参考文献)\], AttGAN\[[8](#参考文献)\], STGAN\[[9](#参考文献)\], SPADE\[[13](#参考文献)\]。 注意: -1. StarGAN,AttGAN和STGAN由于梯度惩罚所需的操作目前只支持GPU,需使用GPU训练。 -2. GAN模型目前仅仅验证了单机单卡训练和预测结果。 -3. CGAN和DCGAN两个模型训练使用的数据集为MNIST数据集;StarGAN,AttGAN和STGAN的数据集为CelebA数据集。Pix2Pix和CycleGAN支持的数据集可以参考download.py中的cycle_pix_dataset。cityscapes数据集需要从[官方](https://www.cityscapes-dataset.com)下载数据,下载完之后使用`scripts/prepare_cityscapes_dataset.py`处理,处理后的文件夹命名为cityscapes并放入data目录下即可。 -4. PaddlePaddle1.5.1及之前的版本不支持在AttGAN和STGAN模型里的判别器加上的instance norm。如果要在判别器中加上instance norm,请源码编译develop分支并安装。 -5. 中间效果图保存在${output_dir}/test文件夹中。对于Pix2Pix来说,inputA 和inputB 代表输入的两种风格的图片,fakeB表示生成图片;对于CycleGAN来说,inputA表示输入图片,fakeB表示inputA根据生成的图片,cycA表示fakeB经过生成器重构出来的对应于inputA的重构图片;对于StarGAN,AttGAN和STGAN来说,第一行表示原图,之后的每一行都代表一种属性变换。 -6. infer过程使用的test_list文件和训练过程中使用的train_list具有相同格式,第一行为样本数量,第二行为属性,之后的行中第一个表示图片名称,之后的-1和1表示该图片是否拥有该属性(1为有该属性,-1为没有该属性)。 +1. GAN模型目前仅仅验证了单机单卡训练和预测结果。 +2. CGAN和DCGAN两个模型训练使用的数据集为MNIST数据集;StarGAN,AttGAN和STGAN的数据集为CelebA数据集。Pix2Pix和CycleGAN支持的数据集可以参考download.py中的cycle_pix_dataset。cityscapes数据集需要从[官方](https://www.cityscapes-dataset.com)下载数据,下载完之后使用`scripts/prepare_cityscapes_dataset.py`处理,处理后的文件夹命名为cityscapes并放入data目录下即可。 +3. PaddlePaddle1.5.1及之前的版本不支持在AttGAN和STGAN模型里的判别器加上的instance norm。如果要在判别器中加上instance norm,请源码编译develop分支并安装。 +4. 中间效果图保存在${output_dir}/test文件夹中。对于Pix2Pix来说,inputA 和inputB 代表输入的两种风格的图片,fakeB表示生成图片;对于CycleGAN来说,inputA表示输入图片,fakeB表示inputA根据生成的图片,cycA表示fakeB经过生成器重构出来的对应于inputA的重构图片;对于StarGAN,AttGAN和STGAN来说,第一行表示原图,之后的每一行都代表一种属性变换。 +5. infer过程使用的test_list文件和训练过程中使用的train_list具有相同格式,第一行为样本数量,第二行为属性,之后的行中第一个表示图片名称,之后的-1和1表示该图片是否拥有该属性(1为有该属性,-1为没有该属性)。 +6. metric中的fid评价指标需要先下载inceptionV3模型参数,模型参数下载链接:[inceptionV3](https://paddle-gan-models.bj.bcebos.com/params_inceptionV3.tar.gz) 图像生成模型库库的目录结构如下: ``` @@ -58,6 +58,10 @@ │ ├── celeba │ ├── ${image_dir} 存放实际图片 │ ├── list 文件 +│ +├── metric 评价指标 +│ ├── compute_fid.py 计算fid_score的文件 +│ ├── inception.py 计算fid_score所需要的inceptionV3网络结构 ``` @@ -71,7 +75,7 @@ 在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.7.1或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。 其他依赖包: -1. `pip install imageio` 或者 `pip install -r requirements.txt` 安装imageio包(保存图片代码中所依赖的包) +1. `pip install -r requirements.txt` 安装imageio包(保存图片代码中所依赖的包) ### 任务简介 diff --git a/PaddleCV/gan/metric/compute_fid.py b/PaddleCV/gan/metric/compute_fid.py new file mode 100644 index 00000000..70c3926f --- /dev/null +++ b/PaddleCV/gan/metric/compute_fid.py @@ -0,0 +1,183 @@ +import os +import fnmatch +import numpy as np +import cv2 +from cv2 import imread +from scipy import linalg +import paddle.fluid as fluid +from inception import InceptionV3 + +def tqdm(x): return x + +""" based on https://github.com/mit-han-lab/gan-compression/blob/master/metric/fid_score.py +""" + +""" +inceptionV3 pretrain model is convert from pytorch, pretrain_model url is https://paddle-gan-models.bj.bcebos.com/params_inceptionV3.tar.gz +""" + +def _calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): + m1 = np.atleast_1d(mu1) + m2 = np.atleast_1d(mu2) + + sigma1 = np.atleast_2d(sigma1) + sigma2 = np.atleast_2d(sigma2) + + assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths' + assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions' + + diff = mu1 - mu2 + + t = sigma1.dot(sigma2) + covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) + if not np.isfinite(covmean).all(): + msg = ('fid calculation produces singular product; ' + 'adding %s to diagonal of cov estimates') % eps + print(msg) + offset = np.eye(sigma1.shape[0]) * eps + covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) + + # Numerical error might give slight imaginary component + if np.iscomplexobj(covmean): + if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): + m = np.max(np.abs(covmean.imag)) + raise ValueError('Imaginary component {}'.format(m)) + covmean = covmean.real + + tr_covmean = np.trace(covmean) + + return (diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean) + + +def _build_program(model): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + images = fluid.data(name='images', shape = [None, 3, None, None]) + output = model.network(images, class_dim=1008) + pred = fluid.layers.pool2d(output[0], global_pooling=True) + + test_program = main_program.clone(for_test=True) + return pred, test_program, startup_program + +def _get_activations_from_ims(img, model, batch_size, dims, use_gpu, premodel_path): + n_batches = (len(img) + batch_size - 1) // batch_size + n_used_img = len(img) + + pred_arr = np.empty((n_used_img, dims)) + + for i in tqdm(range(n_batches)): + start = i * batch_size + end = start + batch_size + if end > len(img): + end = len(img) + images = img[start: end] + if images.shape[1] != 3: + images = images.transpose((0, 3, 1, 2)) + images /= 255 + + output, main_program, startup_program = _build_program(model) + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + fluid.load(main_program, os.path.join(premodel_path, 'paddle_inceptionv3'), exe) + pred = exe.run(main_program, feed = {'images': images}, fetch_list = [output])[0] + + pred_arr[start: end] = pred.reshape(end - start, -1) + + return pred_arr + +def _compute_statistic_of_img(img, model, batch_size, dims, use_gpu, premodel_path): + act = _get_activations_from_ims(img, model, batch_size, dims, use_gpu, premodel_path) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + return mu,sigma + +def calculate_fid_given_img(img_fake, img_real, batch_size, use_gpu, dims, premodel_path, model=None): + assert os.path.exists(premodel_path), 'pretrain_model path {} is not exists! Please download it first'.format(premodel_path) + if model is None: + block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] + model = InceptionV3([block_idx]) + + m1, s1 = _compute_statistic_of_img(img_fake, model, batch_size, dims, use_gpu, premodel_path) + m2, s2 = _compute_statistic_of_img(img_real, model, batch_size, dims, use_gpu, premodel_path) + + fid_value = _calculate_frechet_distance(m1, s1, m2, s2) + return fid_value + + +def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): + if len(files) % batch_size != 0: + print(('Warning: number of images is not a multiple of the ' + 'batch size. Some samples are going to be ignored.')) + if batch_size > len(files): + print(('Warning: batch size is bigger than the datasets size. ' + 'Setting batch size to datasets size')) + batch_size = len(files) + + n_batches = len(files) // batch_size + n_used_imgs = n_batches * batch_size + + pred_arr = np.empty((n_used_imgs, dims)) + for i in tqdm(range(n_batches)): + start = i * batch_size + end = start + batch_size + images = np.array([imread(str(f)).astype(np.float32) for f in files[start:end]]) + + if len(images.shape) != 4: + images = imread(str(files[start])) + images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY) + images = np.array([images.astype(np.float32)]) + + images = images.transpose((0, 3, 1, 2)) + images /= 255 + + output, main_program, startup_program = _build_program(model) + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + fluid.load(main_program, os.path.join(premodel_path, 'paddle_inceptionv3'), exe) + pred = exe.run(main_program, feed = {'images': images}, fetch_list = [output])[0] + + pred_arr[start: end] = pred.reshape(end - start, -1) + + return pred_arr + +def _calculate_activation_statistics(files, model, premodel_path, batch_size=50, dims=2048, use_gpu=False): + act = _get_activations(files, model, batch_size, dims, use_gpu, premodel_path) + mu = np.mean(act, axis=0) + sigma = np.cov(act, rowvar=False) + return mu, sigma + +def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu, premodel_path): + if path.endswith('.npz'): + f = np.load(path) + m, s = f['mu'][:], f['sigma'][:] + f.close() + else: + files = [] + for root, dirnames, filenames in os.walk(path): + for filename in fnmatch.filter(filenames, '*.jpg') or fnmatch.filter(filenames, '*.png'): + files.append(os.path.join(root, filename)) + m, s = _calculate_activation_statistics(files, model, premodel_path, batch_size, dims, use_gpu) + return m, s + +def calculate_fid_given_paths(paths, batch_size, use_gpu, dims, premodel_path, model=None): + assert os.path.exists(premodel_path), 'pretrain_model path {} is not exists! Please download it first'.format(premodel_path) + for p in paths: + if not os.path.exists(p): + raise RuntimeError('Invalid path: %s' % p) + + if model is None: + block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] + model = InceptionV3([block_idx]) + + m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims, use_gpu, premodel_path) + m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims, use_gpu, premodel_path) + + fid_value = _calculate_frechet_distance(m1, s1, m2, s2) + return fid_value + + diff --git a/PaddleCV/gan/metric/inception.py b/PaddleCV/gan/metric/inception.py new file mode 100644 index 00000000..01dec007 --- /dev/null +++ b/PaddleCV/gan/metric/inception.py @@ -0,0 +1,302 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['InceptionV3'] + + +class InceptionV3: + DEFAULT_BLOCK_INDEX = 3 + BLOCK_INDEX_BY_DIM = { + 64: 0, # First max pooling features + 192: 1, # Second max pooling featurs + 768: 2, # Pre-aux classifier features + 2048: 3 # Final average pooling features + } + + def __init__(self, output_blocks = [DEFAULT_BLOCK_INDEX], resize_input=True, normalize_input=True): + self.resize_input = resize_input + self.normalize_input = normalize_input + self.output_blocks = sorted(output_blocks) + self.last_needed_block = max(output_blocks) + + assert self.last_needed_block <= 3, 'Last possible output block index is 3' + + def network(self, x, class_dim=1000, aux_logits=False): + if self.resize_input: + x = fluid.layers.resize_bilinear(x, out_shape=[299, 299], align_corners=False, align_mode=0) + + if self.normalize_input: + x = x * 2 - 1 + + out, _, = self.fid_inceptionv3(x, class_dim, aux_logits) + return out + + + def fid_inceptionv3(self, x, num_classes=1000, aux_logits=False): + """ inception v3 model for FID computation + """ + out = [] + aux = None + + ### block0 + x = self.conv_bn_layer(x, 32, 3, stride=2, name='Conv2d_1a_3x3') + x = self.conv_bn_layer(x, 32, 3, name='Conv2d_2a_3x3') + x = self.conv_bn_layer(x, 64, 3, padding=1, name='Conv2d_2b_3x3') + x = fluid.layers.pool2d(x, pool_size=3, pool_stride=2, pool_type='max') + if 0 in self.output_blocks: + out.append(x) + + if self.last_needed_block >= 1: + ### block1 + x = self.conv_bn_layer(x, 80, 1, name='Conv2d_3b_1x1') + x = self.conv_bn_layer(x, 192, 3, name='Conv2d_4a_3x3') + x = fluid.layers.pool2d(x, pool_size=3, pool_stride=2, pool_type='max') + if 1 in self.output_blocks: + out.append(x) + + if self.last_needed_block >= 2: + ### block2 + ### Mixed_5b 5c 5d + x = self.fid_inceptionA(x, pool_features=32, name='Mixed_5b') + x = self.fid_inceptionA(x, pool_features=64, name='Mixed_5c') + x = self.fid_inceptionA(x, pool_features=64, name='Mixed_5d') + + ### Mixed_6 + x = self.inceptionB(x, name='Mixed_6a') + x = self.fid_inceptionC(x, c7=128, name='Mixed_6b') + x = self.fid_inceptionC(x, c7=160, name='Mixed_6c') + x = self.fid_inceptionC(x, c7=160, name='Mixed_6d') + x = self.fid_inceptionC(x, c7=192, name='Mixed_6e') + if 2 in self.output_blocks: + out.append(x) + + if aux_logits: + aux = self.inceptionAux(x, num_classes, name='AuxLogits') + + if self.last_needed_block >= 3: + ### block3 + ### Mixed_7 + x = self.inceptionD(x, name='Mixed_7a') + x = self.fid_inceptionE_1(x, name='Mixed_7b') + x = self.fid_inceptionE_2(x, name='Mixed_7c') + + x = fluid.layers.pool2d(x, global_pooling=True, pool_type='avg') + out.append(x) + + #x = fluid.layers.dropout(x, dropout_prob=0.5) + #x = fluid.layers.flatten(x, axis=1) + #x = fluid.layers.fc(x, size=num_classes, param_attr=ParamAttr(name='fc.weight'), bias_attr=ParamAttr(name='fc.bias')) + + return out, aux + + def inceptionA(self, x, pool_features, name=None): + branch1x1 = self.conv_bn_layer(x, 64, 1, name=name+'.branch1x1') + + branch5x5 = self.conv_bn_layer(x, 48, 1, name=name+'.branch5x5_1') + branch5x5 = self.conv_bn_layer(branch5x5, 64, 5, padding=2, name=name+'.branch5x5_2') + + branch3x3dbl = self.conv_bn_layer(x, 64, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, padding=1, name=name+'.branch3x3dbl_3') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, pool_features, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1) + + + def inceptionB(self, x, name=None): + branch3x3 = self.conv_bn_layer(x, 384, 3, stride=2, name=name+'.branch3x3') + + branch3x3dbl = self.conv_bn_layer(x, 64, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, stride=2, name=name+'.branch3x3dbl_3') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=2, pool_type='max') + + return fluid.layers.concat([branch3x3, branch3x3dbl, branch_pool], axis=1) + + def inceptionC(self, x, c7, name=None): + branch1x1 = self.conv_bn_layer(x, 192, 1, name=name+'.branch1x1') + + branch7x7 = self.conv_bn_layer(x, c7, 1, name=name+'.branch7x7_1') + branch7x7 = self.conv_bn_layer(branch7x7, c7, (1, 7), padding=(0, 3), name=name+'.branch7x7_2') + branch7x7 = self.conv_bn_layer(branch7x7, 192, (7, 1), padding=(3, 0), name=name+'.branch7x7_3') + + branch7x7dbl = self.conv_bn_layer(x, c7, 1, name=name+'.branch7x7dbl_1') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (7, 1), padding=(3, 0), name=name+'.branch7x7dbl_2') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (1, 7), padding=(0, 3), name=name+'.branch7x7dbl_3') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (7, 1), padding=(3, 0), name=name+'.branch7x7dbl_4') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, 192, (1, 7), padding=(0, 3), name=name+'.branch7x7dbl_5') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, 192, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1) + + def inceptionD(self, x, name=None): + branch3x3 = self.conv_bn_layer(x, 192, 1, name=name+'.branch3x3_1') + branch3x3 = self.conv_bn_layer(branch3x3, 320, 3, stride=2, name=name+'.branch3x3_2') + + branch7x7x3 = self.conv_bn_layer(x, 192, 1, name=name+'.branch7x7x3_1') + branch7x7x3 = self.conv_bn_layer(branch7x7x3, 192, (1, 7), padding=(0, 3), name=name+'.branch7x7x3_2') + branch7x7x3 = self.conv_bn_layer(branch7x7x3, 192, (7, 1), padding=(3, 0), name=name+'.branch7x7x3_3') + branch7x7x3 = self.conv_bn_layer(branch7x7x3, 192, 3, stride=2, name=name+'.branch7x7x3_4') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=2, pool_type='max') + return fluid.layers.concat([branch3x3, branch7x7x3, branch_pool], axis=1) + + def inceptionE(self, x, name=None): + branch1x1 = self.conv_bn_layer(x, 320, 1, name=name+'.branch1x1') + + branch3x3 = self.conv_bn_layer(x, 384, 1, name=name+'.branch3x3_1') + branch3x3_2a = self.conv_bn_layer(branch3x3, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3_2a') + branch3x3_2b = self.conv_bn_layer(branch3x3, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3_2b') + branch3x3 = fluid.layers.concat([branch3x3_2a, branch3x3_2b], axis=1) + + branch3x3dbl = self.conv_bn_layer(x, 448, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 384, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl_3a = self.conv_bn_layer(branch3x3dbl, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3dbl_3a') + branch3x3dbl_3b = self.conv_bn_layer(branch3x3dbl, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3dbl_3b') + branch3x3dbl = fluid.layers.concat([branch3x3dbl_3a, branch3x3dbl_3b], axis=1) + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, 192, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1) + + def inceptionAux(self, x, num_classes, name=None): + x = fluid.layers.pool2d(x, pool_size=5, pool_stride=3, pool_type='avg') + x = self.conv_bn_layer(x, 128, 1, name=name+'.conv0') + x = self.conv_bn_layer(x, 768, 5, name=name+'.conv1') + x = fluid.layers.pool2d(x, global_pooling=True, pool_type='avg') + x = fluid.layers.flatten(x, axis=1) + x = fluid.layers.fc(x, size=num_classes) + return x + + + def fid_inceptionA(self, x, pool_features, name=None): + """ FID block in inception v3 + """ + branch1x1 = self.conv_bn_layer(x, 64, 1, name=name+'.branch1x1') + + branch5x5 = self.conv_bn_layer(x, 48, 1, name=name+'.branch5x5_1') + branch5x5 = self.conv_bn_layer(branch5x5, 64, 5, padding=2, name=name+'.branch5x5_2') + + branch3x3dbl = self.conv_bn_layer(x, 64, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 96, 3, padding=1, name=name+'.branch3x3dbl_3') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, exclusive=True, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, pool_features, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=1) + + def fid_inceptionC(self, x, c7, name=None): + """ FID block in inception v3 + """ + branch1x1 = self.conv_bn_layer(x, 192, 1, name=name+'.branch1x1') + + branch7x7 = self.conv_bn_layer(x, c7, 1, name=name+'.branch7x7_1') + branch7x7 = self.conv_bn_layer(branch7x7, c7, (1, 7), padding=(0, 3), name=name+'.branch7x7_2') + branch7x7 = self.conv_bn_layer(branch7x7, 192, (7, 1), padding=(3, 0), name=name+'.branch7x7_3') + + branch7x7dbl = self.conv_bn_layer(x, c7, 1, name=name+'.branch7x7dbl_1') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (7, 1), padding=(3, 0), name=name+'.branch7x7dbl_2') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (1, 7), padding=(0, 3), name=name+'.branch7x7dbl_3') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, c7, (7, 1), padding=(3, 0), name=name+'.branch7x7dbl_4') + branch7x7dbl = self.conv_bn_layer(branch7x7dbl, 192, (1, 7), padding=(0, 3), name=name+'.branch7x7dbl_5') + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, exclusive=True, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, 192, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=1) + + def fid_inceptionE_1(self, x, name=None): + """ FID block in inception v3 + """ + branch1x1 = self.conv_bn_layer(x, 320, 1, name=name+'.branch1x1') + + branch3x3 = self.conv_bn_layer(x, 384, 1, name=name+'.branch3x3_1') + branch3x3_2a = self.conv_bn_layer(branch3x3, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3_2a') + branch3x3_2b = self.conv_bn_layer(branch3x3, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3_2b') + branch3x3 = fluid.layers.concat([branch3x3_2a, branch3x3_2b], axis=1) + + branch3x3dbl = self.conv_bn_layer(x, 448, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 384, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl_3a = self.conv_bn_layer(branch3x3dbl, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3dbl_3a') + branch3x3dbl_3b = self.conv_bn_layer(branch3x3dbl, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3dbl_3b') + branch3x3dbl = fluid.layers.concat([branch3x3dbl_3a, branch3x3dbl_3b], axis=1) + + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, exclusive=True, pool_type='avg') + branch_pool = self.conv_bn_layer(branch_pool, 192, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1) + + def fid_inceptionE_2(self, x, name=None): + """ FID block in inception v3 + """ + branch1x1 = self.conv_bn_layer(x, 320, 1, name=name+'.branch1x1') + + branch3x3 = self.conv_bn_layer(x, 384, 1, name=name+'.branch3x3_1') + branch3x3_2a = self.conv_bn_layer(branch3x3, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3_2a') + branch3x3_2b = self.conv_bn_layer(branch3x3, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3_2b') + branch3x3 = fluid.layers.concat([branch3x3_2a, branch3x3_2b], axis=1) + + branch3x3dbl = self.conv_bn_layer(x, 448, 1, name=name+'.branch3x3dbl_1') + branch3x3dbl = self.conv_bn_layer(branch3x3dbl, 384, 3, padding=1, name=name+'.branch3x3dbl_2') + branch3x3dbl_3a = self.conv_bn_layer(branch3x3dbl, 384, (1, 3), padding=(0, 1), name=name+'.branch3x3dbl_3a') + branch3x3dbl_3b = self.conv_bn_layer(branch3x3dbl, 384, (3, 1), padding=(1, 0), name=name+'.branch3x3dbl_3b') + branch3x3dbl = fluid.layers.concat([branch3x3dbl_3a, branch3x3dbl_3b], axis=1) + + ### same with paper + branch_pool = fluid.layers.pool2d(x, pool_size=3, pool_stride=1, pool_padding=1, pool_type='max') + branch_pool = self.conv_bn_layer(branch_pool, 192, 1, name=name+'.branch_pool') + + return fluid.layers.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=1) + + def conv_bn_layer(self, + data, + num_filters, + filter_size, + stride=1, + padding=0, + groups=1, + act='relu', + name=None): + conv = fluid.layers.conv2d( + input=data, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + ".conv.weight"), + bias_attr=False, + name=name) + return fluid.layers.batch_norm( + input=conv, + act=act, + epsilon=0.001, + name=name+'.bn', + param_attr=ParamAttr(name=name + ".bn.weight"), + bias_attr=ParamAttr(name=name + ".bn.bias"), + moving_mean_name=name + '.bn.running_mean', + moving_variance_name=name + '.bn.running_var') diff --git a/PaddleCV/gan/network/base_network.py b/PaddleCV/gan/network/base_network.py index 50b2d864..ed2fd507 100644 --- a/PaddleCV/gan/network/base_network.py +++ b/PaddleCV/gan/network/base_network.py @@ -42,7 +42,7 @@ def norm_layer(input, if norm_type == 'batch_norm': if affine == True: param_attr = fluid.ParamAttr( - name=name + '_w', initializer=fluid.initializer.Constant(1.0)) + name=name + '_w', initializer=fluid.initializer.Normal(loc=1.0, scale=0.02)) bias_attr = fluid.ParamAttr( name=name + '_b', initializer=fluid.initializer.Constant(value=0.0)) diff --git a/PaddleCV/gan/requirements.txt b/PaddleCV/gan/requirements.txt index b74edd61..37f7ae2e 100644 --- a/PaddleCV/gan/requirements.txt +++ b/PaddleCV/gan/requirements.txt @@ -1 +1,2 @@ numpy >= 1.15.0 +opencv-python -- GitLab