未验证 提交 4f244555 编写于 作者: W whs 提交者: GitHub

Add ce for gan. (#1421)

* Add ce for gan.

* Add demo data.
上级 33e68ab4
#!/bin/bash
# This file is only used for continuous evaluation.
export FLAGS_cudnn_deterministic=True
export ce_mode=1
(CUDA_VISIBLE_DEVICES=6 python c_gan.py --batch_size=121 --epoch=1 --run_ce=True --use_gpu=True & \
CUDA_VISIBLE_DEVICES=7 python dc_gan.py --batch_size=121 --epoch=1 --run_ce=True --use_gpu=True) | python _ce.py
####this file is only used for continuous evaluation test!
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
cgan_d_train_cost_kpi = CostKpi('cgan_d_train_cost', 0.02, 0, actived=True, desc='train cost of discriminator')
cgan_g_train_cost_kpi = CostKpi('cgan_g_train_cost', 0.02, 0, actived=True, desc='train cost of generator')
cgan_train_speed_kpi = DurationKpi(
'cgan_duration',
0.05,
0,
actived=True,
unit_repr='second',
desc='train time used in one GPU card')
dcgan_d_train_cost_kpi = CostKpi('dcgan_d_train_cost', 0.02, 0, actived=True, desc='train cost of discriminator')
dcgan_g_train_cost_kpi = CostKpi('dcgan_g_train_cost', 0.02, 0, actived=True, desc='train cost of generator')
dcgan_train_speed_kpi = DurationKpi(
'dcgan_duration',
0.05,
0,
actived=True,
unit_repr='second',
desc='train time used in one GPU card')
tracking_kpis = [dcgan_d_train_cost_kpi, dcgan_g_train_cost_kpi,
dcgan_train_speed_kpi, cgan_d_train_cost_kpi, cgan_g_train_cost_kpi, cgan_train_speed_kpi]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split(',')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
print("kpi {}={}".format(kpi_name, kpi_value))
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
# print("*****")
# print(log)
# print("****")
log_to_ce(log)
......@@ -23,6 +23,7 @@ import functools
import matplotlib
import numpy as np
import paddle
import time
import paddle.fluid as fluid
from utility import get_parent_function_name, plot, check, add_arguments, print_arguments
from network import G_cond, D_cond
......@@ -30,6 +31,7 @@ matplotlib.use('agg')
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
NOISE_SIZE = 100
LEARNING_RATE = 2e-4
......@@ -40,6 +42,7 @@ add_arg('batch_size', int, 121, "Minibatch size.")
add_arg('epoch', int, 20, "The number of epoched to be trained.")
add_arg('output', str, "./output", "The directory the model and the test result to be saved to.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
add_arg('run_ce', bool, False, "Whether to run for model ce.")
# yapf: enable
......@@ -51,6 +54,10 @@ def loss(x, label):
def train(args):
if args.run_ce:
np.random.seed(10)
fluid.default_startup_program().random_seed = 90
d_program = fluid.Program()
dg_program = fluid.Program()
......@@ -89,7 +96,11 @@ def train(args):
if args.use_gpu:
exe = fluid.Executor(fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
if args.run_ce:
train_reader = paddle.batch(
paddle.dataset.mnist.train(),
batch_size=args.batch_size)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=60000),
......@@ -99,6 +110,8 @@ def train(args):
const_n = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
t_time = 0
losses = [[],[]]
for pass_id in range(args.epoch):
for batch_id, data in enumerate(train_reader()):
if len(data) != args.batch_size:
......@@ -115,7 +128,7 @@ def train(args):
fake_labels = np.zeros(
shape=[real_image.shape[0], 1], dtype='float32')
total_label = np.concatenate([real_labels, fake_labels])
s_time = time.time()
generated_image = exe.run(
g_program,
feed={'noise': noise_data,
......@@ -130,7 +143,7 @@ def train(args):
'label': fake_labels,
'conditions': conditions_data
},
fetch_list={d_loss})
fetch_list={d_loss})[0][0]
d_loss_2 = exe.run(d_program,
feed={
......@@ -138,20 +151,25 @@ def train(args):
'label': real_labels,
'conditions': conditions_data
},
fetch_list={d_loss})
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
fetch_list={d_loss})[0][0]
d_loss_n = d_loss_1 + d_loss_2
losses[0].append(d_loss_n)
for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
dg_loss_np = exe.run(
dg_loss_n = exe.run(
dg_program,
feed={'noise': noise_data,
'conditions': conditions_data},
fetch_list={dg_loss})[0]
if batch_id % 10 == 0:
fetch_list={dg_loss})[0][0]
losses[1].append(dg_loss_n)
t_time += (time.time() - s_time)
if batch_id % 10 == 0 and not args.run_ce:
if not os.path.exists(args.output):
os.makedirs(args.output)
# generate image each batch
......@@ -163,9 +181,7 @@ def train(args):
total_images = np.concatenate([real_image, generated_images])
fig = plot(total_images)
msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format(
pass_id, batch_id,
np.sum(d_loss_np),
np.sum(dg_loss_np), check(generated_images))
pass_id, batch_id, d_loss_n, dg_loss_n, check(generated_images))
print(msg)
plt.title(msg)
plt.savefig(
......@@ -174,6 +190,11 @@ def train(args):
bbox_inches='tight')
plt.close(fig)
if args.run_ce:
print("kpis,cgan_d_train_cost,{}".format(np.mean(losses[0])))
print("kpis,cgan_g_train_cost,{}".format(np.mean(losses[1])))
print("kpis,cgan_duration,{}".format(t_time / args.epoch))
if __name__ == "__main__":
args = parser.parse_args()
......
......@@ -23,6 +23,7 @@ import matplotlib
import six
import numpy as np
import paddle
import time
import paddle.fluid as fluid
from utility import get_parent_function_name, plot, check, add_arguments, print_arguments
from network import G, D
......@@ -40,6 +41,7 @@ add_arg('batch_size', int, 128, "Minibatch size.")
add_arg('epoch', int, 20, "The number of epoched to be trained.")
add_arg('output', str, "./output_dcgan", "The directory the model and the test result to be saved to.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
add_arg('run_ce', bool, False, "Whether to run for model ce.")
# yapf: enable
......@@ -51,6 +53,9 @@ def loss(x, label):
def train(args):
if args.run_ce:
np.random.seed(10)
fluid.default_startup_program().random_seed = 90
d_program = fluid.Program()
dg_program = fluid.Program()
......@@ -86,6 +91,11 @@ def train(args):
exe = fluid.Executor(fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())
if args.run_ce:
train_reader = paddle.batch(
paddle.dataset.mnist.train(),
batch_size=args.batch_size)
else:
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=60000),
......@@ -95,6 +105,9 @@ def train(args):
const_n = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
t_time = 0
losses = [[], []]
for pass_id in range(args.epoch):
for batch_id, data in enumerate(train_reader()):
if len(data) != args.batch_size:
......@@ -109,7 +122,7 @@ def train(args):
fake_labels = np.zeros(
shape=[real_image.shape[0], 1], dtype='float32')
total_label = np.concatenate([real_labels, fake_labels])
s_time = time.time()
generated_image = exe.run(g_program,
feed={'noise': noise_data},
fetch_list={g_img})[0]
......@@ -121,25 +134,27 @@ def train(args):
'img': generated_image,
'label': fake_labels,
},
fetch_list={d_loss})
fetch_list={d_loss})[0][0]
d_loss_2 = exe.run(d_program,
feed={
'img': real_image,
'label': real_labels,
},
fetch_list={d_loss})
d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]]
fetch_list={d_loss})[0][0]
d_loss_n = d_loss_1 + d_loss_2
losses[0].append(d_loss_n)
for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG):
noise_data = np.random.uniform(
low=-1.0, high=1.0,
size=[args.batch_size, NOISE_SIZE]).astype('float32')
dg_loss_np = exe.run(dg_program,
dg_loss_n = exe.run(dg_program,
feed={'noise': noise_data},
fetch_list={dg_loss})[0]
if batch_id % 10 == 0:
fetch_list={dg_loss})[0][0]
losses[1].append(dg_loss_n)
t_time += (time.time() - s_time)
if batch_id % 10 == 0 and not args.run_ce:
if not os.path.exists(args.output):
os.makedirs(args.output)
# generate image each batch
......@@ -150,8 +165,7 @@ def train(args):
fig = plot(total_images)
msg = "Epoch ID={0} Batch ID={1} D-Loss={2} DG-Loss={3}\n gen={4}".format(
pass_id, batch_id,
np.sum(d_loss_np),
np.sum(dg_loss_np), check(generated_images))
d_loss_n, dg_loss_n, check(generated_images))
print(msg)
plt.title(msg)
plt.savefig(
......@@ -159,6 +173,10 @@ def train(args):
batch_id),
bbox_inches='tight')
plt.close(fig)
if args.run_ce:
print("kpis,dcgan_d_train_cost,{}".format(np.mean(losses[0])))
print("kpis,dcgan_g_train_cost,{}".format(np.mean(losses[1])))
print("kpis,dcgan_duration,{}".format(t_time / args.epoch))
if __name__ == "__main__":
......
......@@ -4,6 +4,7 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
from utility import get_parent_function_name
import os
gf_dim = 64
df_dim = 64
......@@ -16,6 +17,9 @@ y_dim = 1
output_height = 28
output_width = 28
use_cudnn = True
if 'ce_mode' in os.environ:
use_cudnn = False
def bn(x, name=None, act='relu'):
if name is None:
......@@ -42,6 +46,7 @@ def conv(x, num_filters, name=None, act=None):
pool_stride=2,
param_attr=name + 'w',
bias_attr=name + 'b',
use_cudnn=use_cudnn,
act=act)
......@@ -76,6 +81,7 @@ def deconv(x,
stride=stride,
dilation=dilation,
padding=padding,
use_cudnn=use_cudnn,
act=act)
......
#!/bin/bash
# This file is only used for continuous evaluation.
export FLAGS_cudnn_deterministic=True
export ce_mode=1
CUDA_VISIBLE_DEVICES=0 python train.py --batch_size=1 --epoch=10 --run_ce=True --use_gpu=True | python _ce.py
####this file is only used for continuous evaluation test!
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
d_train_cost_kpi = CostKpi('d_train_cost', 0.05, 0, actived=True, desc='train cost of discriminator')
g_train_cost_kpi = CostKpi('g_train_cost', 0.05, 0, actived=True, desc='train cost of generator')
train_speed_kpi = DurationKpi(
'duration',
0.05,
0,
actived=True,
unit_repr='second',
desc='train time used in one GPU card')
tracking_kpis = [d_train_cost_kpi, g_train_cost_kpi, train_speed_kpi]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split(',')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
print("kpi {}={}".format(kpi_name, kpi_value))
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
# print("*****")
# print(log)
# print("****")
log_to_ce(log)
......@@ -46,18 +46,18 @@ def reader_creater(list_file, cycle=True, shuffle=True, return_name=False):
return reader
def a_reader():
def a_reader(shuffle=True):
"""
Reader of images with A style for training.
"""
return reader_creater(A_LIST_FILE)
return reader_creater(A_LIST_FILE, shuffle=shuffle)
def b_reader():
def b_reader(shuffle=True):
"""
Reader of images with B style for training.
"""
return reader_creater(B_LIST_FILE)
return reader_creater(B_LIST_FILE, shuffle=shuffle)
def a_test_reader():
......
from __future__ import division
import paddle.fluid as fluid
import numpy as np
import os
use_cudnn = True
if 'ce_mode' in os.environ:
use_cudnn = False
def cal_padding(img_size, stride, filter_size, dilation=1):
"""Calculate padding size."""
......@@ -82,7 +86,7 @@ def conv2d(input,
name=name,
stride=stride,
padding=padding,
use_cudnn=False,
use_cudnn=use_cudnn,
param_attr=param_attr,
bias_attr=bias_attr)
if need_crop:
......@@ -137,6 +141,7 @@ def deconv2d(input,
filter_size=filter_size,
stride=stride,
padding=padding,
use_cudnn=use_cudnn,
param_attr=param_attr,
bias_attr=bias_attr)
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import data_reader
import os
import random
import sys
import paddle
import argparse
import functools
import paddle.fluid as fluid
import time
import numpy as np
from paddle.fluid import core
from trainer import *
from scipy.misc import imsave
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from paddle.fluid import core
import data_reader
from utility import add_arguments, print_arguments, ImagePool
from trainer import *
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('epoch', int, 2, "The number of epoched to be trained.")
add_arg('output', str, "./output_1", "The directory the model and the test result to be saved to.")
add_arg('output', str, "./output_0", "The directory the model and the test result to be saved to.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('save_checkpoints', bool, True, "Whether to save checkpoints.")
add_arg('run_test', bool, True, "Whether to run test.")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
add_arg('profile', bool, False, "Whether to profile.")
add_arg('run_ce', bool, False, "Whether to run for model ce.")
# yapf: enable
def train(args):
data_shape = [-1] + data_reader.image_shape()
max_images_num = data_reader.max_images_num()
shuffle=True
if args.run_ce:
np.random.seed(10)
fluid.default_startup_program().random_seed = 90
max_images_num = 1
shuffle = False
data_shape = [-1] + data_reader.image_shape()
input_A = fluid.layers.data(
name='input_A', shape=data_shape, dtype='float32')
......@@ -57,9 +67,9 @@ def train(args):
A_pool = ImagePool()
B_pool = ImagePool()
A_reader = paddle.batch(data_reader.a_reader(), args.batch_size)()
B_reader = paddle.batch(data_reader.b_reader(), args.batch_size)()
A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)()
B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)()
if not args.run_ce:
A_test_reader = data_reader.a_test_reader()
B_test_reader = data_reader.b_test_reader()
......@@ -109,13 +119,13 @@ def train(args):
if not os.path.exists(out_path):
os.makedirs(out_path)
fluid.io.save_persistables(
exe, out_path + "/g_a", main_program=g_A_trainer.program)
exe, out_path + "/g_a", main_program=g_A_trainer.program, filename="params")
fluid.io.save_persistables(
exe, out_path + "/g_b", main_program=g_B_trainer.program)
exe, out_path + "/g_b", main_program=g_B_trainer.program, filename="params")
fluid.io.save_persistables(
exe, out_path + "/d_a", main_program=d_A_trainer.program)
exe, out_path + "/d_a", main_program=d_A_trainer.program, filename="params")
fluid.io.save_persistables(
exe, out_path + "/d_b", main_program=d_B_trainer.program)
exe, out_path + "/d_b", main_program=d_B_trainer.program, filename="params")
print("saved checkpoint to {}".format(out_path))
sys.stdout.flush()
......@@ -134,7 +144,8 @@ def train(args):
if args.init_model:
init_model()
losses=[[], []]
t_time = 0
for epoch in range(args.epoch):
batch_id = 0
for i in range(max_images_num):
......@@ -144,6 +155,7 @@ def train(args):
tensor_B = core.LoDTensor()
tensor_A.set(data_A, place)
tensor_B.set(data_B, place)
s_time = time.time()
# optimize the g_A network
g_A_loss, fake_B_tmp = exe.run(
g_A_trainer.program,
......@@ -158,7 +170,7 @@ def train(args):
d_B_trainer.program,
fetch_list=[d_B_trainer.d_loss_B],
feed={"input_B": tensor_B,
"fake_pool_B": fake_pool_B})
"fake_pool_B": fake_pool_B})[0]
# optimize the g_B network
g_B_loss, fake_A_tmp = exe.run(
......@@ -174,18 +186,24 @@ def train(args):
d_A_trainer.program,
fetch_list=[d_A_trainer.d_loss_A],
feed={"input_A": tensor_A,
"fake_pool_A": fake_pool_A})
"fake_pool_A": fake_pool_A})[0]
t_time += (time.time() - s_time)
print("epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {};".format(
epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0],
d_A_loss[0]))
losses[0].append(g_A_loss[0])
losses[1].append(d_A_loss[0])
sys.stdout.flush()
batch_id += 1
if args.run_test:
if args.run_test and not args.run_ce:
test(epoch)
if args.save_checkpoints:
if args.save_checkpoints and not args.run_ce:
checkpoints(epoch)
if args.run_ce:
print("kpis,g_train_cost,{}".format(np.mean(losses[0])))
print("kpis,d_train_cost,{}".format(np.mean(losses[1])))
print("kpis,duration,{}".format(t_time / args.epoch))
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册