Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • Paddle
  • Issue
  • #10714

P
Paddle
  • 项目概览

PaddlePaddle / Paddle
大约 2 年 前同步成功

通知 2325
Star 20933
Fork 5424
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 1423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
Paddle
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 1,423
    • Issue 1,423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
    • 合并请求 543
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 5月 16, 2018 by saxon_zh@saxon_zhGuest

Paddle训练loss超出合理范围

Created by: kanchangcheng

问题描述: paddle训练数据:已经归一化到[-1,1],样例-0.8498901635342934 -0.5887234561874543 -0.7468879668049793 -0.7144251891628021。 paddle模型代码:main_output = paddle.layer.addto([main_output, h1], act=paddle.activation.Tanh(), bias_attr=True),模型的最后一行代码中,激活函数使用了Tanh,结果输出应该在[-1,1]范围内。 paddel训练设置:batch_size=128,cost使用cost = paddle.layer.square_error_cost(input=main_output, label=lbl)平方误差。

推理过程:loss最大为2的平方*128,即为512。但是我训练的结果Pass 0, Batch 0, Cost 17173.087891。

问题:训练得到loss为17173,远远大于最大loss 512,麻烦paddle的同学帮忙解答,谢谢!

附件: 模型代码

import paddle.v2 as paddle

__all__ = ['STResNet']

def _bn_relu_conv(input,
                  ch_out,
                  filter_size,
                  stride=1,
                  padding=1,
                  ch_in=None):
    tmp = paddle.layer.batch_norm(input=input, act=paddle.activation.Relu())
    return paddle.layer.img_conv(
        input=tmp,
        filter_size=filter_size,
        num_channels=ch_in,
        num_filters=ch_out,
        stride=stride,
        padding=padding,
        act=paddle.activation.Linear(),
        bias_attr=True)

def _shortcut(input, residual):
     print('_shortcut add to')
     return paddle.layer.addto(input=[input, residual], act=paddle.activation.Linear(), bias_attr=True)

def _residual_unit(input, nb_filter):
    def f(input):
        residual = _bn_relu_conv(input = input, ch_out = nb_filter, filter_size = 3)
        residual = _bn_relu_conv(input = residual, ch_out = nb_filter, filter_size = 3)
        return _shortcut(input, residual)
    return f(input)

def ResUnits(input, residual_unit, nb_filter, repetations=1):
    def f(input):
        for i in range(repetations):
            input = residual_unit(
                    input = input,
                    nb_filter=nb_filter)
    	return input
    return f(input)    
     
def interResNet(input, num_channels=2, repetations=1):
    conv1 = paddle.layer.img_conv(input=input, filter_size=3, filter_size_y=3,
                      num_channels=num_channels,
                      num_filters=64, stride=1,
                      bias_attr=True,
                      padding=1,
                      act=paddle.activation.Linear())
    #return conv1    
    residual_output = ResUnits(conv1, _residual_unit, nb_filter=64,
                              repetations=repetations)
    #return residual_output
    #activation = Activation('relu')(residual_output)
    
    residual_output = paddle.layer.addto(
        input=residual_output, act=paddle.activation.Relu(), bias_attr=True)
    
    conv2 = paddle.layer.img_conv(input=residual_output, filter_size=3, filter_size_y=3,
                      num_channels=None,
                      num_filters=2, stride=1,
                      bias_attr=True,
                      padding=1,
                      act=paddle.activation.Linear())
    #print('interResNet conv2 after')
    return conv2


def STResNet(input_c, input_p, input_t, input_ex, external_dim=8, nb_residual_unit=3):
    
    conv2_c = interResNet(input=input_c, num_channels=6, repetations=nb_residual_unit)
    conv2_p = interResNet(input=input_p, num_channels=2, repetations=nb_residual_unit)
    conv2_t = interResNet(input=input_t, num_channels=2, repetations=nb_residual_unit)
    #return conv2_c
 
    with paddle.layer.mixed(size=72*72) as main_output:

        main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_c, size=72*72))
        main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_p, size=72*72))
        main_output += paddle.layer.dotmul_projection(input=paddle.layer.resize(input=conv2_t, size=72*72))
    print('inter merge begin')
    main_output = paddle.layer.resize(input=main_output, size=2*72*72)
    print('inter finish')
        
        
    # fusing with external component
    if external_dim != None and external_dim > 0:
        # external input
        embedding = paddle.layer.fc(
            input=input_ex, size=30, act=paddle.activation.Relu())
        h1 = paddle.layer.fc(
            input=embedding, size=2*72*72, act=paddle.activation.Relu())
        #h1 = paddle.layer.resize(
        #    input=h1, size=2*234*236)
        print('ex merge begin')
        main_output = paddle.layer.addto([main_output, h1], act=paddle.activation.Tanh(), bias_attr=True)
        print('ex merge finish')
    else:
        print('external_dim:', external_dim)
    return main_output

训练代码
import sys, os

import paddle.v2 as paddle
import time

lr = 0.0002  # learning rate
#lr = 0.002  # learning rate


from STResNet_v2 import STResNet
from readData_v1 import readTrainData, readTestData

def main():
    datadim_c = 6 * 72 * 72
    datadim_p = 2 * 72 * 72
    datadim_t = 2 * 72 * 72
    datadim_ex = 31
    #datadim_all = datadim_c * datadim_p * datadim_t
    
    resdim = 2 * 72 * 72
    
    if len(sys.argv) == 1:
        print('self def errror: input fomat error')
        print('need to specify  residual units')
        sys.exit(-1)
        # nb_residual_unit = 2  # number of residual units
    else:
        nb_residual_unit = int(sys.argv[1])  # number of residual units

    # PaddlePaddle init
    #paddle.init(use_gpu=with_gpu, trainer_count=1)
    paddle.init(use_gpu=True)

    input_c = paddle.layer.data(
        name="input_c", type=paddle.data_type.dense_vector(datadim_c))
    input_p = paddle.layer.data(
        name="input_p", type=paddle.data_type.dense_vector(datadim_p))
    input_t = paddle.layer.data(
        name="input_t", type=paddle.data_type.dense_vector(datadim_t))
    input_ex = paddle.layer.data(
        name="input_ex", type=paddle.data_type.dense_vector(datadim_ex))

    # Add neural network config
    # option 1. resnet
    main_output = STResNet(input_c, input_p, input_t, input_ex, external_dim=datadim_ex, nb_residual_unit=nb_residual_unit)
    
    lbl = paddle.layer.data(
        name="in_label", type=paddle.data_type.dense_vector(resdim))
    cost = paddle.layer.square_error_cost(input=main_output, label=lbl)

    # Create parameters
    parameters = paddle.parameters.create(cost)
    
    # Create optimizer
    Adam_optimizer = paddle.optimizer.Adam(
        learning_rate=lr,
        beta1=0.9, beta2=0.999)
    # Create trainer
    trainer = paddle.trainer.SGD(
        cost=cost, parameters=parameters, update_equation=Adam_optimizer)
    
    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            #print "\nPass %d, Batch %d, Cost %f, %s" % (
            #    event.pass_id, event.batch_id, event.cost ** 0.5, event.metrics)
            #if event.batch_id % 10 == 0:
            global ts_batch
            if event.batch_id % 1 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
                print "\nelapsed time(Pass %d, Batch %d): %.3f seconds" % (
                    event.pass_id, event.batch_id, (time.time() - ts_batch))
                ts_batch = time.time()
        if isinstance(event, paddle.event.EndPass):
                        
            global ts_pass
            print "\npass elapsed time(Pass %d): %.3f seconds" % (
                event.pass_id, (time.time() - ts_pass))
            ts_pass = time.time()
            # save parameters
            if event.pass_id % 1 == 0: 
                #with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                        #trainer.save_parameter_to_tar(f)
                result = trainer.test(
                    reader=paddle.batch(
                       readTestData(), batch_size=128),
                        feeding={'input_c': 0,
                                 'input_p': 1,
                                 'input_t': 2,
                                 'input_ex': 3,
                                 'in_label': 4})
                print "\nTest with Pass %d, %s" % (event.pass_id, result.cost)
    
    # Save the inference topology to protobuf.
    inference_topology = paddle.topology.Topology(layers=main_output)
    with open("inference_topology.pkl", 'wb') as f:
        inference_topology.serialize_for_inference(f)
    
    trainer.train(
        reader=paddle.batch(
            #paddle.reader.shuffle(
            #    readTrainData(), buf_size=4),
            readTrainData(),
            batch_size=128),
        num_passes=200,
        event_handler=event_handler,
        feeding={'input_c': 0,
                 'input_p': 1,
                 'input_t': 2,
                 'input_ex': 3,
                 'in_label': 4})

if __name__ == '__main__':
    ts_batch = time.time()
    ts_pass = time.time()
    main()
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/Paddle#10714
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7