Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • Paddle
  • Issue
  • #9261

P
Paddle
  • 项目概览

PaddlePaddle / Paddle
大约 2 年 前同步成功

通知 2325
Star 20933
Fork 5424
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 1423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
Paddle
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 1,423
    • Issue 1,423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
    • 合并请求 543
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 3月 20, 2018 by saxon_zh@saxon_zhGuest

使用crf层,在多线程GPU下,如果batch_size不为1会出错

Created by: jshower

发现的问题是使用包含crf层的代码时,多线程(ParallelDo)在单个gpu下,如果batch_size不为1(比如10),会出文末粘贴的问题。值的注意的是,这和#9234 (closed) 的错误是一致的。 使用的代码是:

import sys
import math

import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.fluid as fluid
import contextlib
import time
import unittest
from five_corss_val import filereader

sys.stdout.flush()

word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)

mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 10

IS_SPARSE = True
PASS_NUM = 1
BATCH_SIZE = 1

embedding_name = 'emb'
default_std = 1 / math.sqrt(hidden_dim) / 3.0

def load_parameter(file_name, h, w):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header.
        return np.fromfile(f, dtype=np.float32).reshape(h, w)

def to_lodtensor(data, place):
    seq_lens = [len(seq) for seq in data]
    cur_len = 0
    lod = [cur_len]
    for l in seq_lens:
        cur_len += l
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int64")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
    res = fluid.LoDTensor()
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res

word = fluid.layers.data(
    name='word_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.layers.data(
    name='verb_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.layers.data(
    name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
    name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
    name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
    name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
    name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.layers.data(
    name='mark_data', shape=[1], dtype='int64', lod_level=1)

target = fluid.layers.data(
    name='target', shape=[1], dtype='int64', lod_level=1)

places = fluid.layers.get_places(device_count=0)
pd = fluid.layers.ParallelDo(places)    
with pd.do():
    word_ = pd.read_input(word)
    predicate_ = pd.read_input(predicate)
    ctx_n2_ = pd.read_input(ctx_n2)
    ctx_n1_ = pd.read_input(ctx_n1)
    ctx_0_ = pd.read_input(ctx_0)
    ctx_p1_ = pd.read_input(ctx_p1)
    ctx_p2_ = pd.read_input(ctx_p2)
    mark_ = pd.read_input(mark)
    target_ = pd.read_input(target)

    predicate_embedding = fluid.layers.embedding(
        input=predicate_,
        size=[pred_len, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr=fluid.ParamAttr(name='vemb', learning_rate=5))

    mark_embedding = fluid.layers.embedding(
        input=mark_,
        size=[mark_dict_len, mark_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr=fluid.ParamAttr(name='mark_emb', learning_rate=5))

    word_input = [word_, ctx_n2_, ctx_n1_, ctx_0_, ctx_p1_, ctx_p2_]
    emb_layers = [
        fluid.layers.embedding(
            size=[word_dict_len, word_dim],
            input=x,
            param_attr=fluid.ParamAttr(
                name=embedding_name, trainable=False)) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0_layers = [
        fluid.layers.fc(input=emb, size=hidden_dim, act="tanh") for emb in emb_layers
    ]

    hidden_0 = fluid.layers.sums(input=hidden_0_layers)

    lstm_0 = fluid.layers.dynamic_lstm(
        input=hidden_0,
        size=hidden_dim,
        candidate_activation='relu',
        gate_activation='sigmoid',
        cell_activation='sigmoid')

    # stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
        mix_hidden = fluid.layers.sums(input=[
            fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act="tanh"),
            fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act="tanh")
        ])

        lstm = fluid.layers.dynamic_lstm(
            input=mix_hidden,
            size=hidden_dim,
            candidate_activation='relu',
            gate_activation='sigmoid',
            cell_activation='sigmoid',
            is_reverse=((i % 2) == 1))

        input_tmp = [mix_hidden, lstm]

    feature_out = fluid.layers.sums(input=[
        fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act="tanh"),
        fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act="tanh")
    ])

    crf_cost = fluid.layers.linear_chain_crf(
        input=feature_out,
        label=target,
        param_attr=fluid.ParamAttr(
            name='crfw', learning_rate=mix_hidden_lr))
    avg_cost1 = fluid.layers.mean(x=crf_cost)

    pd.write_output(avg_cost1)
    pd.write_output(feature_out)

avg_cost_on_each_devs, feature_out_on_each_devs = pd()
feature_out_on_each_devs.stop_gradient = True
avg_cost = fluid.layers.mean(x=avg_cost_on_each_devs)

sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)

crf_decode = fluid.layers.crf_decoding(
    input=feature_out_on_each_devs, param_attr=fluid.ParamAttr(name='crfw'))

(precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = fluid.layers.chunk_eval(input=crf_decode,
    label=target,
    chunk_scheme="IOB",
     num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))

inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
    inference_program = fluid.io.get_inference_program([])

train_data = paddle.batch(
    paddle.reader.shuffle(
        filereader.file_reader("five_corss_val/" + str(sys.argv[1])), buf_size=8192),
    batch_size=BATCH_SIZE)

test_data = paddle.batch(
    paddle.reader.shuffle(
        filereader.file_reader("five_corss_val/" + str(sys.argv[2])), buf_size=8192),
    batch_size=BATCH_SIZE)

place = fluid.CUDAPlace(0)

feeder = fluid.DataFeeder(
feed_list=[
    word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
], place=place)
exe = fluid.Executor(place)

exe.run(fluid.default_startup_program())

embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)
start_time = time.time()
#batch_id = 0
for pass_id in xrange(PASS_NUM):
    #chunk_evaluator.reset(exe)
    time_start = time.time()
    sum_infer = 0
    sum_label = 0
    sum_correct = 0
    for data in train_data():
        start_time = time.time()
        cost, num_infer, num_label, num_correct = exe.run(
            fluid.default_main_program(),
            feed=feeder.feed(data),
            fetch_list=[avg_cost, num_infer_chunks, num_label_chunks, num_correct_chunks])
        sum_infer += num_infer
        sum_label += num_label
        sum_correct += num_correct
        print("cost:" + str(cost[0]), ", num_infer:" + str(num_infer[0]) + ", num_label:" + str(num_label[0]) + ", num_correct:" + str(num_correct))
    precision = 0
    recall = 0
    f1_score = 0
    if sum_infer != 0:
        precision = sum_correct * 1.0 / sum_infer
    if sum_label != 0:
        recall = sum_correct * 1.0 / sum_label
    if precision != 0 or recall != 0:
        f1_score = precision * recall * 2.0 / (precision + recall)
    print("pass_id:" + str(pass_id) + ", precision:" + str(precision) + ", recall:" + str(recall) + ", f1_score:" + str(f1_score))
    time_end = time.time()
    print("pass_id:" + str(pass_id) + ", cost_time:" + str(time_end - time_start))   
    save_dirname = sys.argv[2] + ".save_model_multi_thread." + str(pass_id) 
    fluid.io.save_inference_model(save_dirname, [
                'word_data', 'verb_data', 'ctx_n2_data',
                'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
                'ctx_p2_data', 'mark_data', 'target'], [num_infer_chunks, num_label_chunks, num_correct_chunks], exe)

这是对Paddle/python/paddle/fluid/tests/book/test_label_semantic_roles.py的一个多线程改写的尝试。通过将test_label_semantic_roles.py变成多线程来复现这个问题,也可以联系我提供环境。

Traceback (most recent call last):
  File "new_srl_base_tanh_MultiThread.py", line 289, in <module>
    fetch_list=[avg_cost, num_infer_chunks, num_label_chunks, num_correct_chunks])
  File "/usr/local/lib/python2.7/dist-packages/paddle/fluid/executor.py", line 349, in run
    self.executor.run(program_cache.desc, scope, 0, True, True)
paddle.fluid.core.EnforceNotMet: var crfw@GRAD is both input and output, does not support transform at [/paddle_gpu/Paddle/paddle/fluid/framework/operator.cc:535]
PaddlePaddle Call Stacks:
0       0x7f71c45bd48cp paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 572
1       0x7f71c51b818fp paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 2783
2       0x7f71c4f96af2p paddle::operators::ParallelDoGradOp::AccumulateGrad(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, std::vector<paddle::framework::Scope*, std::allocator<paddle::framework::Scope*> > const&, std::vector<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, std::allocator<boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> > > const&) const + 2482
3       0x7f71c4f9b3bcp paddle::operators::ParallelDoGradOp::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 2220
4       0x7f71c466e4a5p paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool) + 1781
5       0x7f71c466fa5fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool) + 63
6       0x7f71c45d9fc3p void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, paddle::framework::Executor, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(void (paddle::framework::Executor::*)(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool)#1}, void, paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(pybind11::cpp_function::initialize<void, paddle::framework::Executor, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(void (paddle::framework::Executor::*)(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool)#1}&&, void (*)(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) + 579
7       0x7f71c45d7d04p pybind11::cpp_function::dispatcher(_object*, _object*, _object*) + 1236
8             0x4c37edp PyEval_EvalFrameEx + 31165
9             0x4b9ab6p PyEval_EvalCodeEx + 774
10            0x4c16e7p PyEval_EvalFrameEx + 22711
11            0x4b9ab6p PyEval_EvalCodeEx + 774
12            0x4eb30fp
13            0x4e5422p PyRun_FileExFlags + 130
14            0x4e3cd6p PyRun_SimpleFileExFlags + 390
15            0x493ae2p Py_Main + 1554
16      0x7f721003b830p __libc_start_main + 240
17            0x4933e9p _start + 41
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/Paddle#9261
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7