Paddle训练loss超出合理范围
Created by: kanchangcheng
问题描述: paddle训练数据:已经归一化到[-1,1],样例-0.8498901635342934 -0.5887234561874543 -0.7468879668049793 -0.7144251891628021。 paddle模型代码:main_output = paddle.layer.addto([main_output, h1], act=paddle.activation.Tanh(), bias_attr=True),模型的最后一行代码中,激活函数使用了Tanh,结果输出应该在[-1,1]范围内。 paddel训练设置:batch_size=128,cost使用cost = paddle.layer.square_error_cost(input=main_output, label=lbl)平方误差。
推理过程:loss最大为2的平方*128,即为512。但是我训练的结果Pass 0, Batch 0, Cost 17173.087891。
问题:训练得到loss为17173,远远大于最大loss 512,麻烦paddle的同学帮忙解答,谢谢!
附件: 模型代码
import paddle.v2 as paddle
__all__ = ['STResNet']
def _bn_relu_conv(input,
ch_out,
filter_size,
stride=1,
padding=1,
ch_in=None):
tmp = paddle.layer.batch_norm(input=input, act=paddle.activation.Relu())
return paddle.layer.img_conv(
input=tmp,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=True)
def _shortcut(input, residual):
print('_shortcut add to')
return paddle.layer.addto(input=[input, residual], act=paddle.activation.Linear(), bias_attr=True)
def _residual_unit(input, nb_filter):
def f(input):
residual = _bn_relu_conv(input = input, ch_out = nb_filter, filter_size = 3)
residual = _bn_relu_conv(input = residual, ch_out = nb_filter, filter_size = 3)
return _shortcut(input, residual)
return f(input)
def ResUnits(input, residual_unit, nb_filter, repetations=1):
def f(input):
for i in range(repetations):
input = residual_unit(
input = input,
nb_filter=nb_filter)
return input
return f(input)
def interResNet(input, num_channels=2, repetations=1):
conv1 = paddle.layer.img_conv(input=input, filter_size=3, filter_size_y=3,
num_channels=num_channels,
num_filters=64, stride=1,
bias_attr=True,
padding=1,
act=paddle.activation.Linear())
#return conv1
residual_output = ResUnits(conv1, _residual_unit, nb_filter=64,
repetations=repetations)
#return residual_output
#activation = Activation('relu')(residual_output)
residual_output = paddle.layer.addto(
input=residual_output, act=paddle.activation.Relu(), bias_attr=True)
conv2 = paddle.layer.img_conv(input=residual_output, filter_size=3, filter_size_y=3,
num_channels=None,
num_filters=2, stride=1,
bias_attr=True,
padding=1,
act=paddle.activation.Linear())
#print('interResNet conv2 after')
return conv2
def STResNet(input_c, input_p, input_t, input_ex, external_dim=8, nb_residual_unit=3):
conv2_c = interResNet(input=input_c, num_channels=6, repetations=nb_residual_unit)
conv2_p = interResNet(input=input_p, num_channels=2, repetations=nb_residual_unit)
conv2_t = interResNet(input=input_t, num_channels=2, repetations=nb_residual_unit)
#return conv2_c
with paddle.layer.mixed(size=72*72) as main_output:
main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_c, size=72*72))
main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_p, size=72*72))
main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_t, size=72*72))
print('inter merge begin')
main_output = paddle.layer.resize(input=main_output, size=2*72*72)
print('inter finish')
# fusing with external component
if external_dim != None and external_dim > 0:
# external input
embedding = paddle.layer.fc(
input=input_ex, size=30, act=paddle.activation.Relu())
h1 = paddle.layer.fc(
input=embedding, size=2*72*72, act=paddle.activation.Relu())
#h1 = paddle.layer.resize(
# input=h1, size=2*234*236)
print('ex merge begin')
main_output = paddle.layer.addto([main_output, h1], act=paddle.activation.Tanh(), bias_attr=True)
print('ex merge finish')
else:
print('external_dim:', external_dim)
return main_output
训练代码
import sys, os
import paddle.v2 as paddle
import time
lr = 0.0002 # learning rate
#lr = 0.002 # learning rate
from STResNet_v2 import STResNet
from readData_v1 import readTrainData, readTestData
def main():
datadim_c = 6 * 72 * 72
datadim_p = 2 * 72 * 72
datadim_t = 2 * 72 * 72
datadim_ex = 31
#datadim_all = datadim_c * datadim_p * datadim_t
resdim = 2 * 72 * 72
if len(sys.argv) == 1:
print('self def errror: input fomat error')
print('need to specify residual units')
sys.exit(-1)
# nb_residual_unit = 2 # number of residual units
else:
nb_residual_unit = int(sys.argv[1]) # number of residual units
# PaddlePaddle init
#paddle.init(use_gpu=with_gpu, trainer_count=1)
paddle.init(use_gpu=True)
input_c = paddle.layer.data(
name="input_c", type=paddle.data_type.dense_vector(datadim_c))
input_p = paddle.layer.data(
name="input_p", type=paddle.data_type.dense_vector(datadim_p))
input_t = paddle.layer.data(
name="input_t", type=paddle.data_type.dense_vector(datadim_t))
input_ex = paddle.layer.data(
name="input_ex", type=paddle.data_type.dense_vector(datadim_ex))
# Add neural network config
# option 1. resnet
main_output = STResNet(input_c, input_p, input_t, input_ex, external_dim=datadim_ex, nb_residual_unit=nb_residual_unit)
lbl = paddle.layer.data(
name="in_label", type=paddle.data_type.dense_vector(resdim))
cost = paddle.layer.square_error_cost(input=main_output, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
Adam_optimizer = paddle.optimizer.Adam(
learning_rate=lr,
beta1=0.9, beta2=0.999)
# Create trainer
trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=Adam_optimizer)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
#print "\nPass %d, Batch %d, Cost %f, %s" % (
# event.pass_id, event.batch_id, event.cost ** 0.5, event.metrics)
#if event.batch_id % 10 == 0:
global ts_batch
if event.batch_id % 1 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
print "\nelapsed time(Pass %d, Batch %d): %.3f seconds" % (
event.pass_id, event.batch_id, (time.time() - ts_batch))
ts_batch = time.time()
if isinstance(event, paddle.event.EndPass):
global ts_pass
print "\npass elapsed time(Pass %d): %.3f seconds" % (
event.pass_id, (time.time() - ts_pass))
ts_pass = time.time()
# save parameters
if event.pass_id % 1 == 0:
#with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
#trainer.save_parameter_to_tar(f)
result = trainer.test(
reader=paddle.batch(
readTestData(), batch_size=128),
feeding={'input_c': 0,
'input_p': 1,
'input_t': 2,
'input_ex': 3,
'in_label': 4})
print "\nTest with Pass %d, %s" % (event.pass_id, result.cost)
# Save the inference topology to protobuf.
inference_topology = paddle.topology.Topology(layers=main_output)
with open("inference_topology.pkl", 'wb') as f:
inference_topology.serialize_for_inference(f)
trainer.train(
reader=paddle.batch(
#paddle.reader.shuffle(
# readTrainData(), buf_size=4),
readTrainData(),
batch_size=128),
num_passes=200,
event_handler=event_handler,
feeding={'input_c': 0,
'input_p': 1,
'input_t': 2,
'input_ex': 3,
'in_label': 4})
if __name__ == '__main__':
ts_batch = time.time()
ts_pass = time.time()
main()