未验证 提交 2b14b50c 编写于 作者: W whs 提交者: GitHub

Add cycle gan. (#1225)

* Add cycle gan.

* Fix bugs

* Fix program.

* Update cycleGAN
上级 16788c58

运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。
## 代码结构
```
├── data_reader.py # 读取、处理数据。
├── layers.py # 封装定义基础的layers。
├── model.py # 定义基础生成网络和判别网络。
├── trainer.py # 构造loss和训练网络。
├── train.py # 训练脚本。
└── infer.py # 预测脚本。
```
## 简介
TODO
## 数据准备
本教程使用 horse2zebra 数据集 来进行模型的训练测试工作,该数据集是用关键字'wild horse'和'zebra'过滤[ImageNet](http://www.image-net.org/)数据集并下载得到的。
horse2zebra训练集包含1069张野马图片,1336张斑马图片。测试集包含121张野马图片和141张斑马图片。
数据下载处理完毕后,并组织为以下路径:
```
horse2zebra/
|-- testA
|-- testA.txt
|-- testB
|-- testB.txt
|-- trainA
|-- trainA.txt
|-- trainB
`-- trainB.txt
```
以上数据文件中,‘testA’为存放野马测试图片的文件夹,‘testB’为存放斑马测试图片的文件夹,'testA.txt'和'testB.txt'分别为野马和斑马测试图片路径列表文件,格式如下:
```
testA/n02381460_9243.jpg
testA/n02381460_9244.jpg
testA/n02381460_9245.jpg
```
训练数据组织方式与测试数据相同。
## 模型训练与预测
### 训练
在GPU单卡上训练:
```
env CUDA_VISIABLE_DEVICES=0 python train.py
```
执行`python train.py --help`可查看更多使用方式和参数详细说明。
图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_A_loss','g_B_loss','d_A_loss'和'd_B_loss'分别为生成器A、生成器B、判别器A和判别器B的训练损失。
<p align="center">
<img src="images/cycleGAN_loss.png" width="620" hspace='10'/> <br/>
<strong>图 1</strong>
</p>
### 预测
执行以下命令读取多张图片进行预测:
```
env CUDA_VISIBLE_DEVICE=0 python infer.py \
--model_path="models/1" --input="./data/inputA/*" \
--output="./output"
```
训练150轮的模型预测效果如图2和图3所示:
<p align="center">
<img src="images/A2B.jpg" width="620" hspace='10'/> <br/>
<strong>图 2</strong>
</p>
<p align="center">
<img src="images/B2A.jpg" width="620" hspace='10'/> <br/>
<strong>图 3</strong>
</p>
import os
from PIL import Image
import numpy as np
from itertools import izip
A_LIST_FILE = "./data/horse2zebra/trainA.txt"
B_LIST_FILE = "./data/horse2zebra/trainB.txt"
A_TEST_LIST_FILE = "./data/horse2zebra/testA.txt"
B_TEST_LIST_FILE = "./data/horse2zebra/testB.txt"
IMAGES_ROOT = "./data/horse2zebra/"
def image_shape():
return [3, 256, 256]
def max_images_num():
return 1
def reader_creater(list_file, cycle=True, shuffle=True, return_name=False):
images = [IMAGES_ROOT + line for line in open(list_file, 'r').readlines()]
def reader():
while True:
if shuffle:
np.random.shuffle(images)
for file in images:
file = file.strip("\n\r\t ")
image = Image.open(file)
image = image.resize((256, 256))
image = np.array(image) / 127.5 - 1
if len(image.shape) != 3:
continue
image = image[:, :, 0:3].astype("float32")
image = image.transpose([2, 0, 1])
if return_name:
yield image[np.newaxis, :], os.path.basename(file)
else:
yield image
if not cycle:
break
return reader
def a_reader():
"""
Reader of images with A style for training.
"""
return reader_creater(A_LIST_FILE)
def b_reader():
"""
Reader of images with B style for training.
"""
return reader_creater(B_LIST_FILE)
def a_test_reader():
"""
Reader of images with A style for test.
"""
return reader_creater(A_TEST_LIST_FILE, cycle=False, return_name=True)
def b_test_reader():
"""
Reader of images with B style for test.
"""
return reader_creater(B_TEST_LIST_FILE, cycle=False, return_name=True)
if __name__ == "__main__":
for A, B in izip(a_test_reader()(), a_test_reader()()):
print A[0].shape
print A[1]
print B[0].shape
print B[1]
import argparse
import functools
import os
from PIL import Image
from paddle.fluid import core
import paddle.fluid as fluid
import paddle
import numpy as np
from scipy.misc import imsave
from model import *
import glob
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('input', str, None, "The images to be infered.")
add_arg('output', str, "./infer_result", "The directory the infer result to be saved to.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('input_style', str, "A", "The style of the input, A or B")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
# yapf: enable
def infer(args):
data_shape = [-1, 3, 256, 256]
input = fluid.layers.data(name='input', shape=data_shape, dtype='float32')
if args.input_style == "A":
fake = build_generator_resnet_9blocks(input, name="g_A")
elif args.input_style == "B":
fake = build_generator_resnet_9blocks(input, name="g_B")
else:
raise "Input with style [%s] is not supported." % args.input_style
# prepare environment
place = fluid.CPUPlace()
if args.use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fluid.io.load_persistables(exe, args.init_model)
if not os.path.exists(args.output):
os.makedirs(args.output)
for file in glob.glob(args.input):
print "read %s" % file
image_name = os.path.basename(file)
image = Image.open(file)
image = image.resize((256, 256))
image = np.array(image) / 127.5 - 1
if len(image.shape) != 3:
continue
data = image.transpose([2, 0, 1])[np.newaxis, :].astype("float32")
tensor = core.LoDTensor()
tensor.set(data, place)
fake_temp = exe.run(fetch_list=[fake.name], feed={"input": tensor})
fake_temp = np.squeeze(fake_temp[0]).transpose([1, 2, 0])
input_temp = np.squeeze(data).transpose([1, 2, 0])
imsave(args.output + "/fake_" + image_name, (
(fake_temp + 1) * 127.5).astype(np.uint8))
if __name__ == "__main__":
args = parser.parse_args()
print_arguments(args)
infer(args)
from __future__ import division
import paddle.fluid as fluid
import numpy as np
def cal_padding(img_size, stride, filter_size, dilation=1):
"""Calculate padding size."""
valid_filter_size = dilation * (filter_size - 1) + 1
if img_size % stride == 0:
out_size = max(filter_size - stride, 0)
else:
out_size = max(filter_size - (img_size % stride), 0)
return out_size // 2, out_size - out_size // 2
def instance_norm(input, name=None):
helper = fluid.layer_helper.LayerHelper("instance_norm", **locals())
dtype = helper.input_dtype()
epsilon = 1e-5
mean = fluid.layers.reduce_mean(input, dim=[2, 3], keep_dim=True)
var = fluid.layers.reduce_mean(
fluid.layers.square(input - mean), dim=[2, 3], keep_dim=True)
if name is not None:
scale_name = name + "_scale"
offset_name = name + "_offset"
scale_param = fluid.ParamAttr(
name=scale_name,
initializer=fluid.initializer.TruncatedNormal(1.0, 0.02),
trainable=True)
offset_param = fluid.ParamAttr(
name=offset_name,
initializer=fluid.initializer.Constant(0.0),
trainable=True)
scale = helper.create_parameter(
attr=scale_param, shape=input.shape[1:2], dtype=dtype)
offset = helper.create_parameter(
attr=offset_param, shape=input.shape[1:2], dtype=dtype)
tmp = fluid.layers.elementwise_mul(x=(input - mean), y=scale, axis=1)
tmp = tmp / fluid.layers.sqrt(var + epsilon)
tmp = fluid.layers.elementwise_add(tmp, offset, axis=1)
return tmp
def conv2d(input,
num_filters=64,
filter_size=7,
stride=1,
stddev=0.02,
padding="VALID",
name="conv2d",
norm=True,
relu=True,
relufactor=0.0):
"""Wrapper for conv2d op to support VALID and SAME padding mode."""
need_crop = False
if padding == "SAME":
top_padding, bottom_padding = cal_padding(input.shape[2], stride,
filter_size)
left_padding, right_padding = cal_padding(input.shape[2], stride,
filter_size)
height_padding = bottom_padding
width_padding = right_padding
if top_padding != bottom_padding or left_padding != right_padding:
height_padding = top_padding + stride
width_padding = left_padding + stride
need_crop = True
else:
height_padding = 0
width_padding = 0
padding = [height_padding, width_padding]
param_attr = fluid.ParamAttr(
name=name + "_w",
initializer=fluid.initializer.TruncatedNormal(scale=stddev))
bias_attr = fluid.ParamAttr(
name=name + "_b", initializer=fluid.initializer.Constant(0.0))
conv = fluid.layers.conv2d(
input,
num_filters,
filter_size,
name=name,
stride=stride,
padding=padding,
use_cudnn=False,
param_attr=param_attr,
bias_attr=bias_attr)
if need_crop:
conv = fluid.layers.crop(
conv,
shape=(-1, conv.shape[1], conv.shape[2] - 1, conv.shape[3] - 1),
offsets=(0, 0, 1, 1))
if norm:
conv = instance_norm(input=conv, name=name + "_norm")
if relu:
conv = fluid.layers.leaky_relu(conv, relufactor)
return conv
def deconv2d(input,
out_shape,
num_filters=64,
filter_size=7,
stride=1,
stddev=0.02,
padding="VALID",
name="conv2d",
norm=True,
relu=True,
relufactor=0.0):
"""Wrapper for deconv2d op to support VALID and SAME padding mode."""
need_crop = False
if padding == "SAME":
top_padding, bottom_padding = cal_padding(out_shape[0], stride,
filter_size)
left_padding, right_padding = cal_padding(out_shape[1], stride,
filter_size)
height_padding = top_padding
width_padding = left_padding
if top_padding != bottom_padding or left_padding != right_padding:
need_crop = True
else:
height_padding = 0
width_padding = 0
padding = [height_padding, width_padding]
param_attr = fluid.ParamAttr(
name=name + "_w",
initializer=fluid.initializer.TruncatedNormal(scale=stddev))
bias_attr = fluid.ParamAttr(
name=name + "_b", initializer=fluid.initializer.Constant(0.0))
conv = fluid.layers.conv2d_transpose(
input,
num_filters,
name=name,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=param_attr,
bias_attr=bias_attr)
if need_crop:
conv = fluid.layers.crop(
conv,
shape=(-1, conv.shape[1], conv.shape[2] - 1, conv.shape[3] - 1),
offsets=(0, 0, 0, 0))
if norm:
conv = instance_norm(input=conv, name=name + "_norm")
if relu:
conv = fluid.layers.leaky_relu(conv, relufactor)
return conv
from layers import *
import paddle.fluid as fluid
def build_resnet_block(inputres, dim, name="resnet"):
out_res = fluid.layers.pad2d(inputres, [1, 1, 1, 1], mode="reflect")
out_res = conv2d(out_res, dim, 3, 1, 0.02, "VALID", name + "_c1")
out_res = fluid.layers.pad2d(out_res, [1, 1, 1, 1], mode="reflect")
out_res = conv2d(
out_res, dim, 3, 1, 0.02, "VALID", name + "_c2", relu=False)
return fluid.layers.relu(out_res + inputres)
def build_generator_resnet_9blocks(inputgen, name="generator"):
'''The shape of input should be equal to the shape of output.'''
pad_input = fluid.layers.pad2d(inputgen, [3, 3, 3, 3], mode="reflect")
o_c1 = conv2d(pad_input, 32, 7, 1, 0.02, name=name + "_c1")
o_c2 = conv2d(o_c1, 64, 3, 2, 0.02, "SAME", name + "_c2")
o_c3 = conv2d(o_c2, 128, 3, 2, 0.02, "SAME", name + "_c3")
o_r1 = build_resnet_block(o_c3, 128, name + "_r1")
o_r2 = build_resnet_block(o_r1, 128, name + "_r2")
o_r3 = build_resnet_block(o_r2, 128, name + "_r3")
o_r4 = build_resnet_block(o_r3, 128, name + "_r4")
o_r5 = build_resnet_block(o_r4, 128, name + "_r5")
o_r6 = build_resnet_block(o_r5, 128, name + "_r6")
o_r7 = build_resnet_block(o_r6, 128, name + "_r7")
o_r8 = build_resnet_block(o_r7, 128, name + "_r8")
o_r9 = build_resnet_block(o_r8, 128, name + "_r9")
o_c4 = deconv2d(o_r9, [128, 128], 64, 3, 2, 0.02, "SAME", name + "_c4")
o_c5 = deconv2d(o_c4, [256, 256], 32, 3, 2, 0.02, "SAME", name + "_c5")
o_c6 = conv2d(o_c5, 3, 7, 1, 0.02, "SAME", name + "_c6", relu=False)
out_gen = fluid.layers.tanh(o_c6, name + "_t1")
return out_gen
def build_gen_discriminator(inputdisc, name="discriminator"):
o_c1 = conv2d(
inputdisc,
64,
4,
2,
0.02,
"SAME",
name + "_c1",
norm=False,
relufactor=0.2)
o_c2 = conv2d(o_c1, 128, 4, 2, 0.02, "SAME", name + "_c2", relufactor=0.2)
o_c3 = conv2d(o_c2, 256, 4, 2, 0.02, "SAME", name + "_c3", relufactor=0.2)
o_c4 = conv2d(o_c3, 512, 4, 1, 0.02, "SAME", name + "_c4", relufactor=0.2)
o_c5 = conv2d(
o_c4, 1, 4, 1, 0.02, "SAME", name + "_c5", norm=False, relu=False)
return o_c5
import data_reader
import os
import random
import sys
import paddle
import argparse
import functools
import paddle.fluid as fluid
import numpy as np
from paddle.fluid import core
from trainer import *
from itertools import izip
from scipy.misc import imsave
import paddle.fluid.profiler as profiler
from utility import add_arguments, print_arguments, ImagePool
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('epoch', int, 2, "The number of epoched to be trained.")
add_arg('output', str, "./output_1", "The directory the model and the test result to be saved to.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('save_checkpoints', bool, True, "Whether to save checkpoints.")
add_arg('run_test', bool, True, "Whether to run test.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
add_arg('profile', bool, False, "Whether to profile.")
# yapf: enable
def train(args):
data_shape = [-1] + data_reader.image_shape()
max_images_num = data_reader.max_images_num()
input_A = fluid.layers.data(
name='input_A', shape=data_shape, dtype='float32')
input_B = fluid.layers.data(
name='input_B', shape=data_shape, dtype='float32')
fake_pool_A = fluid.layers.data(
name='fake_pool_A', shape=data_shape, dtype='float32')
fake_pool_B = fluid.layers.data(
name='fake_pool_B', shape=data_shape, dtype='float32')
g_A_trainer = GATrainer(input_A, input_B)
g_B_trainer = GBTrainer(input_A, input_B)
d_A_trainer = DATrainer(input_A, fake_pool_A)
d_B_trainer = DBTrainer(input_B, fake_pool_B)
# prepare environment
place = fluid.CPUPlace()
if args.use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
A_pool = ImagePool()
B_pool = ImagePool()
A_reader = paddle.batch(data_reader.a_reader(), args.batch_size)()
B_reader = paddle.batch(data_reader.b_reader(), args.batch_size)()
A_test_reader = data_reader.a_test_reader()
B_test_reader = data_reader.b_test_reader()
def test(epoch):
out_path = args.output + "/test"
if not os.path.exists(out_path):
os.makedirs(out_path)
i = 0
for data_A, data_B in izip(A_test_reader(), B_test_reader()):
A_name = data_A[1]
B_name = data_B[1]
tensor_A = core.LoDTensor()
tensor_B = core.LoDTensor()
tensor_A.set(data_A[0], place)
tensor_B.set(data_B[0], place)
fake_A_temp, fake_B_temp, cyc_A_temp, cyc_B_temp = exe.run(
g_A_trainer.infer_program,
fetch_list=[
g_A_trainer.fake_A, g_A_trainer.fake_B, g_A_trainer.cyc_A,
g_A_trainer.cyc_B
],
feed={"input_A": tensor_A,
"input_B": tensor_B})
fake_A_temp = np.squeeze(fake_A_temp[0]).transpose([1, 2, 0])
fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0])
cyc_A_temp = np.squeeze(cyc_A_temp[0]).transpose([1, 2, 0])
cyc_B_temp = np.squeeze(cyc_B_temp[0]).transpose([1, 2, 0])
input_A_temp = np.squeeze(data_A[0]).transpose([1, 2, 0])
input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0])
imsave(out_path + "/fakeB_" + str(epoch) + "_" + A_name, (
(fake_B_temp + 1) * 127.5).astype(np.uint8))
imsave(out_path + "/fakeA_" + str(epoch) + "_" + B_name, (
(fake_A_temp + 1) * 127.5).astype(np.uint8))
imsave(out_path + "/cycA_" + str(epoch) + "_" + A_name, (
(cyc_A_temp + 1) * 127.5).astype(np.uint8))
imsave(out_path + "/cycB_" + str(epoch) + "_" + B_name, (
(cyc_B_temp + 1) * 127.5).astype(np.uint8))
imsave(out_path + "/inputA_" + str(epoch) + "_" + A_name, (
(input_A_temp + 1) * 127.5).astype(np.uint8))
imsave(out_path + "/inputB_" + str(epoch) + "_" + B_name, (
(input_B_temp + 1) * 127.5).astype(np.uint8))
i += 1
def checkpoints(epoch):
out_path = args.output + "/checkpoints/" + str(epoch)
if not os.path.exists(out_path):
os.makedirs(out_path)
fluid.io.save_persistables(
exe, out_path + "/g_a", main_program=g_A_trainer.program)
fluid.io.save_persistables(
exe, out_path + "/g_b", main_program=g_B_trainer.program)
fluid.io.save_persistables(
exe, out_path + "/d_a", main_program=d_A_trainer.program)
fluid.io.save_persistables(
exe, out_path + "/d_b", main_program=d_B_trainer.program)
print "saved checkpoint to [%s]" % out_path
sys.stdout.flush()
def init_model():
assert os.path.exists(
args.init_model), "[%s] cann't be found." % args.init_mode
fluid.io.load_persistables(
exe, args.init_model + "/g_a", main_program=g_A_trainer.program)
fluid.io.load_persistables(
exe, args.init_model + "/g_b", main_program=g_B_trainer.program)
fluid.io.load_persistables(
exe, args.init_model + "/d_a", main_program=d_A_trainer.program)
fluid.io.load_persistables(
exe, args.init_model + "/d_b", main_program=d_B_trainer.program)
print "Load model from [%s]" % args.init_model
if args.init_model:
init_model()
for epoch in range(args.epoch):
batch_id = 0
for i in range(max_images_num):
data_A = A_reader.next()
data_B = B_reader.next()
tensor_A = core.LoDTensor()
tensor_B = core.LoDTensor()
tensor_A.set(data_A, place)
tensor_B.set(data_B, place)
# optimize the g_A network
g_A_loss, fake_B_tmp = exe.run(
g_A_trainer.program,
fetch_list=[g_A_trainer.g_loss_A, g_A_trainer.fake_B],
feed={"input_A": tensor_A,
"input_B": tensor_B})
fake_pool_B = B_pool.pool_image(fake_B_tmp)
# optimize the d_B network
d_B_loss = exe.run(
d_B_trainer.program,
fetch_list=[d_B_trainer.d_loss_B],
feed={"input_B": tensor_B,
"fake_pool_B": fake_pool_B})
# optimize the g_B network
g_B_loss, fake_A_tmp = exe.run(
g_B_trainer.program,
fetch_list=[g_B_trainer.g_loss_B, g_B_trainer.fake_A],
feed={"input_A": tensor_A,
"input_B": tensor_B})
fake_pool_A = A_pool.pool_image(fake_A_tmp)
# optimize the d_A network
d_A_loss = exe.run(
d_A_trainer.program,
fetch_list=[d_A_trainer.d_loss_A],
feed={"input_A": tensor_A,
"fake_pool_A": fake_pool_A})
print "epoch[%d]; batch[%d]; g_A_loss: %s; d_B_loss: %s; g_B_loss: %s; d_A_loss: %s;" % (
epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0],
d_A_loss[0])
sys.stdout.flush()
batch_id += 1
if args.run_test:
test(epoch)
if args.save_checkpoints:
checkpoints(epoch)
if __name__ == "__main__":
args = parser.parse_args()
print_arguments(args)
if args.profile:
if args.use_gpu:
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
with profiler.profiler("CPU", sorted_key='total') as cpuprof:
train(args)
else:
train(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册