diff --git a/fluid/PaddleCV/gan/c_gan/.run_ce.sh b/fluid/PaddleCV/gan/c_gan/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..7dee419d90a9719f6c9790f0ffc0b50c69870815 --- /dev/null +++ b/fluid/PaddleCV/gan/c_gan/.run_ce.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# This file is only used for continuous evaluation. +export FLAGS_cudnn_deterministic=True +export ce_mode=1 +(CUDA_VISIBLE_DEVICES=6 python c_gan.py --batch_size=121 --epoch=1 --run_ce=True --use_gpu=True & \ +CUDA_VISIBLE_DEVICES=7 python dc_gan.py --batch_size=121 --epoch=1 --run_ce=True --use_gpu=True) | python _ce.py + + diff --git a/fluid/PaddleCV/gan/c_gan/_ce.py b/fluid/PaddleCV/gan/c_gan/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..c391a390d0be07449b72acbc7672ef82ecf8b3fc --- /dev/null +++ b/fluid/PaddleCV/gan/c_gan/_ce.py @@ -0,0 +1,78 @@ +####this file is only used for continuous evaluation test! +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +cgan_d_train_cost_kpi = CostKpi('cgan_d_train_cost', 0.02, 0, actived=True, desc='train cost of discriminator') +cgan_g_train_cost_kpi = CostKpi('cgan_g_train_cost', 0.02, 0, actived=True, desc='train cost of generator') +cgan_train_speed_kpi = DurationKpi( + 'cgan_duration', + 0.05, + 0, + actived=True, + unit_repr='second', + desc='train time used in one GPU card') +dcgan_d_train_cost_kpi = CostKpi('dcgan_d_train_cost', 0.02, 0, actived=True, desc='train cost of discriminator') +dcgan_g_train_cost_kpi = CostKpi('dcgan_g_train_cost', 0.02, 0, actived=True, desc='train cost of generator') +dcgan_train_speed_kpi = DurationKpi( + 'dcgan_duration', + 0.05, + 0, + actived=True, + unit_repr='second', + desc='train time used in one GPU card') + + +tracking_kpis = [dcgan_d_train_cost_kpi, dcgan_g_train_cost_kpi, + dcgan_train_speed_kpi, cgan_d_train_cost_kpi, cgan_g_train_cost_kpi, cgan_train_speed_kpi] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split(',') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + print("kpi {}={}".format(kpi_name, kpi_value)) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() +# print("*****") +# print(log) +# print("****") + log_to_ce(log) diff --git a/fluid/PaddleCV/gan/c_gan/c_gan.py b/fluid/PaddleCV/gan/c_gan/c_gan.py index a5b27fe35a8674e08a63d1bbd77a14fde86daf65..18c6e5df232d5077126001b0fe17ca098c8e6c4b 100644 --- a/fluid/PaddleCV/gan/c_gan/c_gan.py +++ b/fluid/PaddleCV/gan/c_gan/c_gan.py @@ -23,6 +23,7 @@ import functools import matplotlib import numpy as np import paddle +import time import paddle.fluid as fluid from utility import get_parent_function_name, plot, check, add_arguments, print_arguments from network import G_cond, D_cond @@ -30,6 +31,7 @@ matplotlib.use('agg') import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec + NOISE_SIZE = 100 LEARNING_RATE = 2e-4 @@ -40,6 +42,7 @@ add_arg('batch_size', int, 121, "Minibatch size.") add_arg('epoch', int, 20, "The number of epoched to be trained.") add_arg('output', str, "./output", "The directory the model and the test result to be saved to.") add_arg('use_gpu', bool, True, "Whether to use GPU to train.") +add_arg('run_ce', bool, False, "Whether to run for model ce.") # yapf: enable @@ -51,6 +54,10 @@ def loss(x, label): def train(args): + if args.run_ce: + np.random.seed(10) + fluid.default_startup_program().random_seed = 90 + d_program = fluid.Program() dg_program = fluid.Program() @@ -89,16 +96,22 @@ def train(args): if args.use_gpu: exe = fluid.Executor(fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) - - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=60000), - batch_size=args.batch_size) + if args.run_ce: + train_reader = paddle.batch( + paddle.dataset.mnist.train(), + batch_size=args.batch_size) + else: + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=60000), + batch_size=args.batch_size) NUM_TRAIN_TIMES_OF_DG = 2 const_n = np.random.uniform( low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') + t_time = 0 + losses = [[],[]] for pass_id in range(args.epoch): for batch_id, data in enumerate(train_reader()): if len(data) != args.batch_size: @@ -115,7 +128,7 @@ def train(args): fake_labels = np.zeros( shape=[real_image.shape[0], 1], dtype='float32') total_label = np.concatenate([real_labels, fake_labels]) - + s_time = time.time() generated_image = exe.run( g_program, feed={'noise': noise_data, @@ -130,7 +143,7 @@ def train(args): 'label': fake_labels, 'conditions': conditions_data }, - fetch_list={d_loss}) + fetch_list={d_loss})[0][0] d_loss_2 = exe.run(d_program, feed={ @@ -138,20 +151,25 @@ def train(args): 'label': real_labels, 'conditions': conditions_data }, - fetch_list={d_loss}) - - d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]] + fetch_list={d_loss})[0][0] + d_loss_n = d_loss_1 + d_loss_2 + losses[0].append(d_loss_n) for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG): noise_data = np.random.uniform( low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') - dg_loss_np = exe.run( + dg_loss_n = exe.run( dg_program, feed={'noise': noise_data, 'conditions': conditions_data}, - fetch_list={dg_loss})[0] - if batch_id % 10 == 0: + fetch_list={dg_loss})[0][0] + losses[1].append(dg_loss_n) + t_time += (time.time() - s_time) + + + + if batch_id % 10 == 0 and not args.run_ce: if not os.path.exists(args.output): os.makedirs(args.output) # generate image each batch @@ -163,9 +181,7 @@ def train(args): total_images = np.concatenate([real_image, generated_images]) fig = plot(total_images) msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}".format( - pass_id, batch_id, - np.sum(d_loss_np), - np.sum(dg_loss_np), check(generated_images)) + pass_id, batch_id, d_loss_n, dg_loss_n, check(generated_images)) print(msg) plt.title(msg) plt.savefig( @@ -174,6 +190,11 @@ def train(args): bbox_inches='tight') plt.close(fig) + if args.run_ce: + print("kpis,cgan_d_train_cost,{}".format(np.mean(losses[0]))) + print("kpis,cgan_g_train_cost,{}".format(np.mean(losses[1]))) + print("kpis,cgan_duration,{}".format(t_time / args.epoch)) + if __name__ == "__main__": args = parser.parse_args() diff --git a/fluid/PaddleCV/gan/c_gan/dc_gan.py b/fluid/PaddleCV/gan/c_gan/dc_gan.py index b21992da6a4c92cd650853ed04a400f44c012f3b..4601243f2e48c2a89ad5729649024c23d48453b1 100644 --- a/fluid/PaddleCV/gan/c_gan/dc_gan.py +++ b/fluid/PaddleCV/gan/c_gan/dc_gan.py @@ -23,6 +23,7 @@ import matplotlib import six import numpy as np import paddle +import time import paddle.fluid as fluid from utility import get_parent_function_name, plot, check, add_arguments, print_arguments from network import G, D @@ -40,6 +41,7 @@ add_arg('batch_size', int, 128, "Minibatch size.") add_arg('epoch', int, 20, "The number of epoched to be trained.") add_arg('output', str, "./output_dcgan", "The directory the model and the test result to be saved to.") add_arg('use_gpu', bool, True, "Whether to use GPU to train.") +add_arg('run_ce', bool, False, "Whether to run for model ce.") # yapf: enable @@ -51,6 +53,9 @@ def loss(x, label): def train(args): + if args.run_ce: + np.random.seed(10) + fluid.default_startup_program().random_seed = 90 d_program = fluid.Program() dg_program = fluid.Program() @@ -86,15 +91,23 @@ def train(args): exe = fluid.Executor(fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=60000), - batch_size=args.batch_size) + if args.run_ce: + train_reader = paddle.batch( + paddle.dataset.mnist.train(), + batch_size=args.batch_size) + else: + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=60000), + batch_size=args.batch_size) NUM_TRAIN_TIMES_OF_DG = 2 const_n = np.random.uniform( low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') + + t_time = 0 + losses = [[], []] for pass_id in range(args.epoch): for batch_id, data in enumerate(train_reader()): if len(data) != args.batch_size: @@ -109,7 +122,7 @@ def train(args): fake_labels = np.zeros( shape=[real_image.shape[0], 1], dtype='float32') total_label = np.concatenate([real_labels, fake_labels]) - + s_time = time.time() generated_image = exe.run(g_program, feed={'noise': noise_data}, fetch_list={g_img})[0] @@ -121,25 +134,27 @@ def train(args): 'img': generated_image, 'label': fake_labels, }, - fetch_list={d_loss}) + fetch_list={d_loss})[0][0] d_loss_2 = exe.run(d_program, feed={ 'img': real_image, 'label': real_labels, }, - fetch_list={d_loss}) - - d_loss_np = [d_loss_1[0][0], d_loss_2[0][0]] + fetch_list={d_loss})[0][0] + d_loss_n = d_loss_1 + d_loss_2 + losses[0].append(d_loss_n) for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG): noise_data = np.random.uniform( low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') - dg_loss_np = exe.run(dg_program, + dg_loss_n = exe.run(dg_program, feed={'noise': noise_data}, - fetch_list={dg_loss})[0] - if batch_id % 10 == 0: + fetch_list={dg_loss})[0][0] + losses[1].append(dg_loss_n) + t_time += (time.time() - s_time) + if batch_id % 10 == 0 and not args.run_ce: if not os.path.exists(args.output): os.makedirs(args.output) # generate image each batch @@ -150,8 +165,7 @@ def train(args): fig = plot(total_images) msg = "Epoch ID={0} Batch ID={1} D-Loss={2} DG-Loss={3}\n gen={4}".format( pass_id, batch_id, - np.sum(d_loss_np), - np.sum(dg_loss_np), check(generated_images)) + d_loss_n, dg_loss_n, check(generated_images)) print(msg) plt.title(msg) plt.savefig( @@ -159,7 +173,11 @@ def train(args): batch_id), bbox_inches='tight') plt.close(fig) - + if args.run_ce: + print("kpis,dcgan_d_train_cost,{}".format(np.mean(losses[0]))) + print("kpis,dcgan_g_train_cost,{}".format(np.mean(losses[1]))) + print("kpis,dcgan_duration,{}".format(t_time / args.epoch)) + if __name__ == "__main__": args = parser.parse_args() diff --git a/fluid/PaddleCV/gan/c_gan/network.py b/fluid/PaddleCV/gan/c_gan/network.py index 4a13a1f21edccfa28a017b43b1ae288229f52cdc..3cb48d1aca02d8c0180b5a10f4c876f62b6e7909 100644 --- a/fluid/PaddleCV/gan/c_gan/network.py +++ b/fluid/PaddleCV/gan/c_gan/network.py @@ -4,6 +4,7 @@ from __future__ import print_function import paddle import paddle.fluid as fluid from utility import get_parent_function_name +import os gf_dim = 64 df_dim = 64 @@ -16,6 +17,9 @@ y_dim = 1 output_height = 28 output_width = 28 +use_cudnn = True +if 'ce_mode' in os.environ: + use_cudnn = False def bn(x, name=None, act='relu'): if name is None: @@ -42,6 +46,7 @@ def conv(x, num_filters, name=None, act=None): pool_stride=2, param_attr=name + 'w', bias_attr=name + 'b', + use_cudnn=use_cudnn, act=act) @@ -76,6 +81,7 @@ def deconv(x, stride=stride, dilation=dilation, padding=padding, + use_cudnn=use_cudnn, act=act) diff --git a/fluid/PaddleCV/gan/cycle_gan/.run_ce.sh b/fluid/PaddleCV/gan/cycle_gan/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..0da265e0526cbd9bf3efa84ab1ab8161117cb891 --- /dev/null +++ b/fluid/PaddleCV/gan/cycle_gan/.run_ce.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# This file is only used for continuous evaluation. +export FLAGS_cudnn_deterministic=True +export ce_mode=1 +CUDA_VISIBLE_DEVICES=0 python train.py --batch_size=1 --epoch=10 --run_ce=True --use_gpu=True | python _ce.py + + diff --git a/fluid/PaddleCV/gan/cycle_gan/_ce.py b/fluid/PaddleCV/gan/cycle_gan/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..df206ec62e6c1b64b3efe0f40bfdcd78ba9fab46 --- /dev/null +++ b/fluid/PaddleCV/gan/cycle_gan/_ce.py @@ -0,0 +1,68 @@ +####this file is only used for continuous evaluation test! +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +d_train_cost_kpi = CostKpi('d_train_cost', 0.05, 0, actived=True, desc='train cost of discriminator') +g_train_cost_kpi = CostKpi('g_train_cost', 0.05, 0, actived=True, desc='train cost of generator') +train_speed_kpi = DurationKpi( + 'duration', + 0.05, + 0, + actived=True, + unit_repr='second', + desc='train time used in one GPU card') + + +tracking_kpis = [d_train_cost_kpi, g_train_cost_kpi, train_speed_kpi] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split(',') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + print("kpi {}={}".format(kpi_name, kpi_value)) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() +# print("*****") +# print(log) +# print("****") + log_to_ce(log) diff --git a/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA.txt b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA.txt new file mode 100644 index 0000000000000000000000000000000000000000..abfcac8fee9b8b289e20e2bbc89525cc7ebbe84f --- /dev/null +++ b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA.txt @@ -0,0 +1 @@ +trainA/n02381460_1001.jpg diff --git a/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA/n02381460_1001.jpg b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA/n02381460_1001.jpg new file mode 100755 index 0000000000000000000000000000000000000000..0c88b45c17dc4d0026e26f779bb1a0f3415cab41 Binary files /dev/null and b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainA/n02381460_1001.jpg differ diff --git a/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB.txt b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB.txt new file mode 100644 index 0000000000000000000000000000000000000000..fbf8ae902a8332e07f26a3af45d2fe0ed2b9016a --- /dev/null +++ b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB.txt @@ -0,0 +1 @@ +trainB/n02391049_10007.jpg diff --git a/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB/n02391049_10007.jpg b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB/n02391049_10007.jpg new file mode 100755 index 0000000000000000000000000000000000000000..2858b0e8e36bcda966c5536da27b6bbc8c89301c Binary files /dev/null and b/fluid/PaddleCV/gan/cycle_gan/data/horse2zebra/trainB/n02391049_10007.jpg differ diff --git a/fluid/PaddleCV/gan/cycle_gan/data_reader.py b/fluid/PaddleCV/gan/cycle_gan/data_reader.py index 4cbf81c031a72bf66d4b356533e4e14fedbcc7c2..50b5de7877ab6da62fb9286f9c5ff5c4f746ff80 100644 --- a/fluid/PaddleCV/gan/cycle_gan/data_reader.py +++ b/fluid/PaddleCV/gan/cycle_gan/data_reader.py @@ -46,18 +46,18 @@ def reader_creater(list_file, cycle=True, shuffle=True, return_name=False): return reader -def a_reader(): +def a_reader(shuffle=True): """ Reader of images with A style for training. """ - return reader_creater(A_LIST_FILE) + return reader_creater(A_LIST_FILE, shuffle=shuffle) -def b_reader(): +def b_reader(shuffle=True): """ Reader of images with B style for training. """ - return reader_creater(B_LIST_FILE) + return reader_creater(B_LIST_FILE, shuffle=shuffle) def a_test_reader(): diff --git a/fluid/PaddleCV/gan/cycle_gan/layers.py b/fluid/PaddleCV/gan/cycle_gan/layers.py index 8d5f01afe667c29441d554ce3f3ceb1092796922..0cbd5af6ebccc140104b81a75717fa3555c12d5a 100644 --- a/fluid/PaddleCV/gan/cycle_gan/layers.py +++ b/fluid/PaddleCV/gan/cycle_gan/layers.py @@ -1,7 +1,11 @@ from __future__ import division import paddle.fluid as fluid import numpy as np +import os +use_cudnn = True +if 'ce_mode' in os.environ: + use_cudnn = False def cal_padding(img_size, stride, filter_size, dilation=1): """Calculate padding size.""" @@ -82,7 +86,7 @@ def conv2d(input, name=name, stride=stride, padding=padding, - use_cudnn=False, + use_cudnn=use_cudnn, param_attr=param_attr, bias_attr=bias_attr) if need_crop: @@ -137,6 +141,7 @@ def deconv2d(input, filter_size=filter_size, stride=stride, padding=padding, + use_cudnn=use_cudnn, param_attr=param_attr, bias_attr=bias_attr) diff --git a/fluid/PaddleCV/gan/cycle_gan/train.py b/fluid/PaddleCV/gan/cycle_gan/train.py index b9ee2a08a3446c6a2369f4148a0a644aadad4d37..1cc2fa090b3c35d61071f7ce1b7caedbd18226f9 100644 --- a/fluid/PaddleCV/gan/cycle_gan/train.py +++ b/fluid/PaddleCV/gan/cycle_gan/train.py @@ -1,38 +1,48 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import data_reader import os import random import sys import paddle import argparse import functools -import paddle.fluid as fluid +import time import numpy as np -from paddle.fluid import core -from trainer import * from scipy.misc import imsave +import paddle.fluid as fluid import paddle.fluid.profiler as profiler +from paddle.fluid import core +import data_reader from utility import add_arguments, print_arguments, ImagePool +from trainer import * + parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('batch_size', int, 1, "Minibatch size.") add_arg('epoch', int, 2, "The number of epoched to be trained.") -add_arg('output', str, "./output_1", "The directory the model and the test result to be saved to.") +add_arg('output', str, "./output_0", "The directory the model and the test result to be saved to.") add_arg('init_model', str, None, "The init model file of directory.") add_arg('save_checkpoints', bool, True, "Whether to save checkpoints.") add_arg('run_test', bool, True, "Whether to run test.") add_arg('use_gpu', bool, True, "Whether to use GPU to train.") add_arg('profile', bool, False, "Whether to profile.") +add_arg('run_ce', bool, False, "Whether to run for model ce.") # yapf: enable def train(args): - data_shape = [-1] + data_reader.image_shape() + max_images_num = data_reader.max_images_num() + shuffle=True + if args.run_ce: + np.random.seed(10) + fluid.default_startup_program().random_seed = 90 + max_images_num = 1 + shuffle = False + data_shape = [-1] + data_reader.image_shape() input_A = fluid.layers.data( name='input_A', shape=data_shape, dtype='float32') @@ -56,12 +66,12 @@ def train(args): exe.run(fluid.default_startup_program()) A_pool = ImagePool() B_pool = ImagePool() - - A_reader = paddle.batch(data_reader.a_reader(), args.batch_size)() - B_reader = paddle.batch(data_reader.b_reader(), args.batch_size)() - - A_test_reader = data_reader.a_test_reader() - B_test_reader = data_reader.b_test_reader() + + A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle), args.batch_size)() + B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle), args.batch_size)() + if not args.run_ce: + A_test_reader = data_reader.a_test_reader() + B_test_reader = data_reader.b_test_reader() def test(epoch): out_path = args.output + "/test" @@ -109,13 +119,13 @@ def train(args): if not os.path.exists(out_path): os.makedirs(out_path) fluid.io.save_persistables( - exe, out_path + "/g_a", main_program=g_A_trainer.program) + exe, out_path + "/g_a", main_program=g_A_trainer.program, filename="params") fluid.io.save_persistables( - exe, out_path + "/g_b", main_program=g_B_trainer.program) + exe, out_path + "/g_b", main_program=g_B_trainer.program, filename="params") fluid.io.save_persistables( - exe, out_path + "/d_a", main_program=d_A_trainer.program) + exe, out_path + "/d_a", main_program=d_A_trainer.program, filename="params") fluid.io.save_persistables( - exe, out_path + "/d_b", main_program=d_B_trainer.program) + exe, out_path + "/d_b", main_program=d_B_trainer.program, filename="params") print("saved checkpoint to {}".format(out_path)) sys.stdout.flush() @@ -134,7 +144,8 @@ def train(args): if args.init_model: init_model() - + losses=[[], []] + t_time = 0 for epoch in range(args.epoch): batch_id = 0 for i in range(max_images_num): @@ -144,6 +155,7 @@ def train(args): tensor_B = core.LoDTensor() tensor_A.set(data_A, place) tensor_B.set(data_B, place) + s_time = time.time() # optimize the g_A network g_A_loss, fake_B_tmp = exe.run( g_A_trainer.program, @@ -158,7 +170,7 @@ def train(args): d_B_trainer.program, fetch_list=[d_B_trainer.d_loss_B], feed={"input_B": tensor_B, - "fake_pool_B": fake_pool_B}) + "fake_pool_B": fake_pool_B})[0] # optimize the g_B network g_B_loss, fake_A_tmp = exe.run( @@ -174,18 +186,24 @@ def train(args): d_A_trainer.program, fetch_list=[d_A_trainer.d_loss_A], feed={"input_A": tensor_A, - "fake_pool_A": fake_pool_A}) - + "fake_pool_A": fake_pool_A})[0] + t_time += (time.time() - s_time) print("epoch{}; batch{}; g_A_loss: {}; d_B_loss: {}; g_B_loss: {}; d_A_loss: {};".format( epoch, batch_id, g_A_loss[0], d_B_loss[0], g_B_loss[0], d_A_loss[0])) + losses[0].append(g_A_loss[0]) + losses[1].append(d_A_loss[0]) sys.stdout.flush() batch_id += 1 - if args.run_test: + if args.run_test and not args.run_ce: test(epoch) - if args.save_checkpoints: + if args.save_checkpoints and not args.run_ce: checkpoints(epoch) + if args.run_ce: + print("kpis,g_train_cost,{}".format(np.mean(losses[0]))) + print("kpis,d_train_cost,{}".format(np.mean(losses[1]))) + print("kpis,duration,{}".format(t_time / args.epoch)) if __name__ == "__main__":