test loss随模型迭代而增加
Created by: snowsteper
如题 训练日志如下: Test in epoch 0, Loss: 4.757947, Acc: 0.354800 Test in epoch 0, Loss: 4.865430, Acc: 0.354230 Test in epoch 0, Loss: 4.937966, Acc: 0.356729 Test in epoch 1, Loss: 5.124760, Acc: 0.356336 Test in epoch 1, Loss: 5.165730, Acc: 0.355902 Test in epoch 1, Loss: 5.279254, Acc: 0.354999 Test in epoch 2, Loss: 5.431847, Acc: 0.354028 Test in epoch 2, Loss: 5.450323, Acc: 0.352916 Test in epoch 2, Loss: 5.486628, Acc: 0.355298 Test in epoch 3, Loss: 5.596898, Acc: 0.354946 Test in epoch 3, Loss: 5.595257, Acc: 0.352510 Test in epoch 3, Loss: 5.728043, Acc: 0.356146 Test in epoch 4, Loss: 5.937518, Acc: 0.347627 Test in epoch 4, Loss: 5.786941, Acc: 0.353117 Test in epoch 4, Loss: 5.885833, Acc: 0.354856 Test in epoch 5, Loss: 5.934639, Acc: 0.355649 Test in epoch 5, Loss: 5.919194, Acc: 0.353639 Test in epoch 5, Loss: 6.017453, Acc: 0.355212 Test in epoch 6, Loss: 6.062719, Acc: 0.355775 Test in epoch 6, Loss: 6.049249, Acc: 0.352780 Test in epoch 6, Loss: 6.193391, Acc: 0.354625 Test in epoch 7, Loss: 6.236372, Acc: 0.355126 Test in epoch 7, Loss: 6.229053, Acc: 0.351142 Test in epoch 7, Loss: 6.237000, Acc: 0.356059 Test in epoch 8, Loss: 6.331445, Acc: 0.354839 Test in epoch 8, Loss: 6.329272, Acc: 0.351808 Test in epoch 8, Loss: 6.363647, Acc: 0.355070 Test in epoch 9, Loss: 6.449283, Acc: 0.354044 Test in epoch 9, Loss: 6.400001, Acc: 0.351498 Test in epoch 9, Loss: 6.482876, Acc: 0.354735
模型用cnn + softmax + cross_entropy loss 训练程序如下: ``` # Network class_num = len(label_dict) + 1 char_dict_num = len(char_dict) + 1 word_dict_num = len(word_dict) + 1 logger.info("char dict dim:\t%s, word dict dim:\t%s, class dim:\t%s", char_dict_num, word_dict_num, class_num) pred, loss, label = network(char_dict_dim=char_dict_num, word_dict_dim=word_dict_num, class_dim=class_num, is_indep_char=is_indep_char) pred.persistable = True loss.persistable = True label.persistable = True
# Program
main_program = fluid.default_main_program()
start_program = fluid.default_startup_program()
test_program = main_program.clone(for_test=True)
# Optimizer
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=0.01,
beta1=0.9,
beta2=0.999,
epsilon=1e-8)
optimizer.minimize(loss)
fluid.memory_optimize(main_program)
# Executor
exe = fluid.Executor(place)
exe.run(start_program)
# setting for cpu training
# exec_strategy = fluid.ExecutionStrategy()
# exec_strategy.num_threads = 1 # cpu thread num
parallel_executor = fluid.ParallelExecutor(
use_cuda=use_gpu, loss_name=loss.name,
main_program=main_program)
# test_exe = fluid.Executor(place)
test_executor = fluid.ParallelExecutor(
use_cuda=use_gpu,
share_vars_from=parallel_executor,
main_program=test_program)
# Feeder
feed_order = ["char_data", "basic_data", "phrase_data", "label"]
feed_var_list = [ main_program.global_block().var(var_name) for var_name in feed_order ]
feeder = fluid.DataFeeder(place=place, feed_list=feed_var_list)
def run_test(num_epoch):
test_batch = paddle.batch(test_data_reader, 256, drop_last=False)
# test_batch = paddle.batch(test_data_reader, batch_num, drop_last=False)
comp = fluid.metrics.CompositeMetric()
acc = Accuracy_TopN(topN=1)
comp.add_metric(acc)
comp.reset()
tot_loss = .0
tot_iter = 0
for index, data in enumerate(test_batch()):
preds, batch_avg_loss, labels = test_executor.run(fetch_list=[pred.name, loss.name, label.name], feed=feeder.feed(data))
# preds, batch_avg_loss, labels = test_exe.run(program=test_program, fetch_list=[pred, loss, label], feed=feeder.feed(data))
comp.update(preds=preds, labels=labels)
tot_loss += np.mean(batch_avg_loss)
tot_iter = index + 1
np_acc = comp.eval()
test_avg_loss = tot_loss / tot_iter
logger.info("Test in epoch %3d, Loss:\t%.6f, Acc:\t%.6f", num_epoch, test_avg_loss, np_acc[0])
# Train Model
logger.info("Training...total pass:\t%s", pass_num)
for epoch in xrange(pass_num):
for index, data in enumerate(batch_data()):
avg_loss = parallel_executor.run(
[loss.name], feed=feeder.feed(data)
)
# avg_loss = exe.run(
# program=main_program,
# fetch_list=[loss.name], feed=feeder.feed(data)
# )
if index % 1000 == 0:
logger.info("Train epoch %3d, batch %6d, loss %f", epoch, index, np.mean(avg_loss[0]))
if index % 12000 == 0 and index > 0:
run_test(epoch)
run_test(epoch)
feeded_var_names = ["char_data", "basic_data", "phrase_data"]
target_vars = [pred]
model_name = model_name_prefix + "_pass_%04d" % epoch
model_path = os.path.join(model_dir, model_name)
logger.info("Saving model, path:\t%s", model_path)
fluid.io.save_inference_model(dirname=model_path,
model_filename=model_name,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
executor=exe,
main_program=test_program)