Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • Paddle
  • Issue
  • #9878

P
Paddle
  • 项目概览

PaddlePaddle / Paddle
大约 2 年 前同步成功

通知 2325
Star 20933
Fork 5424
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 1423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
Paddle
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 1,423
    • Issue 1,423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
    • 合并请求 543
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 4月 12, 2018 by saxon_zh@saxon_zhGuest

save inference model error

Created by: gmcather

I want to save prediction in inference model. The program runs well when parallel_do is not used, on the contrary, the error happens when combining with parallel_do. How to solve this problem.

# no errors
train(train_reader, word_dict, bow_net, use_cuda=False,
                parallel=False, save_dirname="bow_model", lr=0.002,
                pass_num=30, batch_size=128)  
# error happens
train(train_reader, word_dict, bow_net, use_cuda=False,
                parallel=True, save_dirname="bow_model", lr=0.002,
                pass_num=30, batch_size=128)  
Traceback (most recent call last):
  File "train.py", line 116, in <module>
    train_net()
  File "train.py", line 98, in train_net
    pass_num=30, batch_size=128)
  File "train.py", line 73, in train
    fetch_list=[cost, acc])
  File "/root/.jumbo/lib/python2.7/site-packages/paddle/fluid/executor.py", line 336, in run
    self.executor.run(program.desc, scope, 0, True, True)
paddle.fluid.core.EnforceNotMet: Cannot find variable fc_2.tmp_2@GRAD in the parent scope at [/paddle/paddle/fluid/operators/detail/safe_ref.h:28]
PaddlePaddle Call Stacks:
0       0x7fa43a6ae606p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 486
1       0x7fa43afec81ap
2       0x7fa43aff545fp paddle::operators::ParallelDoGradOp::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 431
3       0x7fa43a744e20p paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool) + 336
4       0x7fa43a745a34p paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool) + 100
5       0x7fa43a6c479bp _ZZN8pybind1112cpp_function10initializeIZNS0_C1IvN6paddle9framework8ExecutorEIRKNS4_11ProgramDescEPNS4_5ScopeEibbEINS_4nameENS_9is_methodENS_7siblingEEEEMT0_FT_DpT1_EDpRKT2_EUlPS5_S8_SA_ibbE_vISO_S8_SA_ibbEISB_SC_SD_EEEvOSF_PFSE_SH_ESN_ENUlRNS_6detail13function_callEE1_4_FUNESV_ + 555
6       0x7fa43a6be164p pybind11::cpp_function::dispatcher(_object*, _object*, _object*) + 2596
7       0x7fa474d8f3d4p PyEval_EvalFrameEx + 25956
8       0x7fa474d90120p PyEval_EvalCodeEx + 2240
9       0x7fa474d8e491p PyEval_EvalFrameEx + 22049
10      0x7fa474d90120p PyEval_EvalCodeEx + 2240
11      0x7fa474d8e491p PyEval_EvalFrameEx + 22049
12      0x7fa474d8ec46p PyEval_EvalFrameEx + 24022
13      0x7fa474d90120p PyEval_EvalCodeEx + 2240
14      0x7fa474d90232p PyEval_EvalCode + 50
15      0x7fa474daa61cp
16      0x7fa474daa6f0p PyRun_FileExFlags + 144
17      0x7fa474dabbfcp PyRun_SimpleFileExFlags + 220
18      0x7fa474dbd4bcp Py_Main + 3164
19      0x7fa474090d1dp __libc_start_main + 253
20            0x400659p

here is my code

"""
For http://wiki.baidu.com/display/LegoNet/Text+Classification
"""
import paddle.fluid as fluid
import paddle.v2 as paddle
import numpy as np
import sys
import time
import unittest
import contextlib
import utils
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import gru_net

def train(train_reader,
        word_dict,
        network,
        use_cuda,
        parallel,
        save_dirname,
        lr=0.2,
        batch_size=128,
        pass_num=30):
    """
    train network
    """
    data = fluid.layers.data(
        name="words",
        shape=[1],
        dtype="int64",
        lod_level=1)

    label = fluid.layers.data(
        name="label",
        shape=[1],
        dtype="int64")

    if not parallel:
        cost, acc, prediction = network(
            data, label, len(word_dict))
    else:
        places = fluid.layers.get_places(device_count = 2)
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            cost, acc, prediction = network(
            pd.read_input(data),
            pd.read_input(label),
            len(word_dict))

            pd.write_output(cost)
            pd.write_output(acc)
            pd.write_output(prediction)

        cost, acc, prediction = pd()
        cost = fluid.layers.mean(cost)
        acc = fluid.layers.mean(acc)

    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
    sgd_optimizer.minimize(cost)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    exe.run(fluid.default_startup_program())
    for pass_id in xrange(pass_num):
        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
        for data in train_reader():
            avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
                                        feed=feeder.feed(data),
                                        fetch_list=[cost, acc])
            data_size = len(data)
            total_acc += data_size * avg_acc_np
            total_cost += data_size * avg_cost_np
            data_count += data_size

        avg_cost = total_cost / data_count
        avg_acc = total_acc / data_count
        print("pass_id: %d, avg_acc: %f, avg_cost: %f" % (pass_id, avg_acc, avg_cost))

        epoch_model = save_dirname + "/" + "epoch" + str(pass_id)
        fluid.io.save_inference_model(
                epoch_model,
                ["words"],
                prediction, exe)


def train_net():
    word_dict, train_reader, test_reader = utils.prepare_data(
            "tiny_imdb", self_dict = False,
            batch_size = 128, buf_size = 50000)

    train(train_reader, word_dict, bow_net, use_cuda=False,
                parallel=True, save_dirname="bow_model", lr=0.002,
                pass_num=30, batch_size=128)

if __name__ == "__main__":
    train_net()
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/Paddle#9878
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7