提交 4b5d8b42 编写于 作者: D Divano 提交者: Hongyu Liu

Add ce to CycleGAN (#2807)

* Update mnist_dygraph.py

fix bug

* add muti card support for se_resnext

* add some description to readme.md

* add ce for cyclegan

* fix code style

* add ce for ptb_lm
上级 0f4ef113
#!/bin/bash
# This file is only used for continuous evaluation.
# dygraph single card
export FLAGS_cudnn_deterministic=True
export CUDA_VISIBLE_DEVICES=0
python train.py --ce --epoch 1 | python _ce.py
####this file is only used for continuous evaluation test!
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
g_loss = CostKpi('g_loss', 0.3, 0, actived=True, desc="g loss")
g_A_loss = CostKpi('g_A_loss', 0.3, 0, actived=True, desc="g A loss")
g_B_loss = CostKpi('g_B_loss', 0.3, 0, actived=True, desc="g B loss")
d_A_loss = CostKpi('d_A_loss', 0.3, 0, actived=True, desc="d A loss")
d_B_loss = CostKpi('d_B_loss', 0.3, 0, actived=True, desc="d B loss")
tracking_kpis = [g_loss, g_A_loss, g_B_loss,
d_A_loss, d_B_loss]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
...@@ -17,6 +17,7 @@ from trainer import * ...@@ -17,6 +17,7 @@ from trainer import *
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import six import six
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--ce", action="store_true", help="run ce")
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.") add_arg('batch_size', int, 1, "Minibatch size.")
...@@ -26,6 +27,7 @@ add_arg('init_model', str, None, "The init model file of director ...@@ -26,6 +27,7 @@ add_arg('init_model', str, None, "The init model file of director
add_arg('save_checkpoints', bool, True, "Whether to save checkpoints.") add_arg('save_checkpoints', bool, True, "Whether to save checkpoints.")
# yapf: enable # yapf: enable
lambda_A = 10.0 lambda_A = 10.0
lambda_B = 10.0 lambda_B = 10.0
lambda_identity = 0.5 lambda_identity = 0.5
...@@ -51,10 +53,17 @@ def train(args): ...@@ -51,10 +53,17 @@ def train(args):
shuffle = True shuffle = True
data_shape = [-1] + data_reader.image_shape() data_shape = [-1] + data_reader.image_shape()
print(data_shape) print(data_shape)
if args.ce:
print("ce mode")
seed = 33
random.seed(seed)
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
shuffle = False
A_pool = ImagePool() A_pool = ImagePool()
B_pool = ImagePool() B_pool = ImagePool()
A_reader = paddle.batch( A_reader = paddle.batch(
data_reader.a_reader(shuffle=shuffle), args.batch_size)() data_reader.a_reader(shuffle=shuffle), args.batch_size)()
B_reader = paddle.batch( B_reader = paddle.batch(
...@@ -154,6 +163,14 @@ def train(args): ...@@ -154,6 +163,14 @@ def train(args):
losses[1].append(d_loss_A[0]) losses[1].append(d_loss_A[0])
sys.stdout.flush() sys.stdout.flush()
batch_id += 1 batch_id += 1
if args.ce and batch_id == 500:
print("kpis\tg_loss\t%0.3f" % g_loss_out[0])
print("kpis\tg_A_loss\t%0.3f" % g_A_loss.numpy()[0])
print("kpis\tg_B_loss\t%0.3f" % g_B_loss.numpy()[0])
print("kpis\td_A_loss\t%0.3f" % d_loss_A.numpy()[0])
print("kpis\td_B_loss\t%0.3f" % d_loss_B.numpy()[0])
break
if args.save_checkpoints: if args.save_checkpoints:
fluid.dygraph.save_persistables(cycle_gan.state_dict(),args.output+"/checkpoints/{}".format(epoch)) fluid.dygraph.save_persistables(cycle_gan.state_dict(),args.output+"/checkpoints/{}".format(epoch))
......
#!/bin/bash
# This file is only used for continuous evaluation.
# dygraph single card
export FLAGS_cudnn_deterministic=True
export CUDA_VISIBLE_DEVICES=0
python ptb_dy.py --data_path data/simple-examples/data/ \
--ce --model_type small | python _ce.py
####this file is only used for continuous evaluation test!
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_ppl = AccKpi('train_ppl', 3, 0, actived=True, desc="train ppl")
test_ppl = AccKpi('test_ppl', 3, 0, actived=True, desc='test ppl')
#train_speed_kpi = DurationKpi(
# 'train_speed',
# 0.05,
# 0,
# actived=True,
# unit_repr='seconds/image',
# desc='train speed in one GPU card')
tracking_kpis = [train_ppl, test_ppl]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
print("-----%s" % fs)
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
print("*****")
print(log)
print("****")
log_to_ce(log)
...@@ -40,6 +40,6 @@ def parse_args(): ...@@ -40,6 +40,6 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--log_path', '--log_path',
help='path of the log file. If not set, logs are printed to console') help='path of the log file. If not set, logs are printed to console')
parser.add_argument('--enable_ce', action='store_true') parser.add_argument('--ce', action='store_true', help="run ce")
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -292,6 +292,13 @@ def train_ptb_lm(): ...@@ -292,6 +292,13 @@ def train_ptb_lm():
return return
with fluid.dygraph.guard(core.CUDAPlace(0)): with fluid.dygraph.guard(core.CUDAPlace(0)):
if args.ce:
print("ce mode")
seed = 33
np.random.seed(seed)
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
max_epoch = 1
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model", "ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
...@@ -315,7 +322,7 @@ def train_ptb_lm(): ...@@ -315,7 +322,7 @@ def train_ptb_lm():
batch_len = len(train_data) // batch_size batch_len = len(train_data) // batch_size
total_batch_size = (batch_len - 1) // num_steps total_batch_size = (batch_len - 1) // num_steps
log_interval = total_batch_size // 10 log_interval = total_batch_size // 20
bd = [] bd = []
lr_arr = [1.0] lr_arr = [1.0]
...@@ -361,6 +368,8 @@ def train_ptb_lm(): ...@@ -361,6 +368,8 @@ def train_ptb_lm():
print("eval finished") print("eval finished")
ppl = np.exp(total_loss / iters) ppl = np.exp(total_loss / iters)
print("ppl ", batch_id, ppl[0]) print("ppl ", batch_id, ppl[0])
if args.ce:
print("kpis\ttest_ppl\t%0.3f" % ppl[0])
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
for epoch_id in range(max_epoch): for epoch_id in range(max_epoch):
...@@ -407,6 +416,8 @@ def train_ptb_lm(): ...@@ -407,6 +416,8 @@ def train_ptb_lm():
print("time cost ", time.time() - start_time) print("time cost ", time.time() - start_time)
ppl = np.exp(total_loss / iters) ppl = np.exp(total_loss / iters)
print("ppl ", epoch_id, ppl[0]) print("ppl ", epoch_id, ppl[0])
if args.ce:
print("kpis\ttrain_ppl\t%0.3f" % ppl[0])
eval(ptb_model, test_data) eval(ptb_model, test_data)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册