SRL任务中CRF-layer使用gpu训练出core
Created by: April1010
完整错误信息如下:
I0728 02:02:37.545069 20 GradientMachine.cpp:85] Initing parameters..
I0728 02:02:53.995805 20 GradientMachine.cpp:92] Init parameters done.
F0728 02:02:54.171394 20 CRFLayer.cpp:57] Check failed: !useGpu_ GPU is not supported
*** Check failure stack trace: ***
@ 0x7fce5af63b6d google::LogMessage::Fail()
@ 0x7fce5af65eb8 google::LogMessage::SendToLog()
@ 0x7fce5af6367b google::LogMessage::Flush()
@ 0x7fce5af66d8e google::LogMessageFatal::~LogMessageFatal()
@ 0x7fce5ac07402 paddle::CRFLayer::forward()
@ 0x7fce5ac4074f paddle::NeuralNetwork::forward()
@ 0x7fce5af336d0 GradientMachine::forwardBackward()
@ 0x7fce5aaae6a4 _wrap_GradientMachine_forwardBackward
@ 0x4cb45e PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4c2509 PyEval_EvalCode
@ 0x4f1def (unknown)
@ 0x4ec652 PyRun_FileExFlags
@ 0x4eae31 PyRun_SimpleFileExFlags
@ 0x49e14a Py_Main
@ 0x7fce9bce5830 __libc_start_main
@ 0x49d9d9 _start
@ (nil) (unknown)
Aborted (core dumped)
网络配置如下(基本和book中LSTM模型的SRL任务一致):
paddle.init(trainer_count=args.trainer_count)
stag_dict_size=data_reader.get_dict_size('stag_dict')
word_dict_size=data_reader.get_dict_size('word_dict')
label_dict_size=data_reader.get_dict_size('label_dict')
mix_hidden_lr=args.mix_hidden_lr
default_std=args.default_std
hidden_dim=args.hidden_dim
word_dim=args.word_dim
mark_dim=args.mark_dim
stag_dim=args.stag_dim
num_lstm_layers=args.num_lstm_layers
word = paddle.layer.data(name = 'word_data', type = d_type(word_dict_size))
predicate = paddle.layer.data(name='predicate_data', type=d_type(word_dict_size))
ctx_n2 = paddle.layer.data(name = 'ctx_n2_data', type = d_type(word_dict_size))
ctx_n1 = paddle.layer.data(name = 'ctx_n1_data', type = d_type(word_dict_size))
ctx_0 = paddle.layer.data(name = 'ctx_0_data', type = d_type(word_dict_size))
ctx_p1 = paddle.layer.data(name = 'ctx_p1_data', type = d_type(word_dict_size))
ctx_p2 = paddle.layer.data(name = 'ctx_p2_data', type = d_type(word_dict_size))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_size))
word_stag = paddle.layer.data(name='word_stag_data', type=d_type(stag_dict_size))
emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static = False)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_stag_embedding = paddle.layer.embedding(
size=stag_dim, input=word_stag, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para)
for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(word_stag_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size = hidden_dim,
bias_attr = std_default,
input = [
paddle.layer.full_matrix_projection(
input = emb, param_attr = std_default) for emb in emb_layers
])
lstm_para_attr = paddle.attr.Param(initial_std = 0.0, learning_rate = 1.0)
hidden_para_attr = paddle.attr.Param(
initial_std = default_std, learning_rate = mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input = hidden_0,
act = paddle.activation.Tanh(),
gate_act = paddle.activation.Sigmoid(),
state_act = paddle.activation.Tanh(),
bias_attr = std_0,
param_attr = lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, num_lstm_layers):
mix_hidden = paddle.layer.mixed(
size = hidden_dim,
bias_attr = std_default,
input = [
paddle.layer.full_matrix_projection(
input = input_tmp[0], param_attr = hidden_para_attr),
paddle.layer.full_matrix_projection(
input = input_tmp[1], param_attr = lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input = mix_hidden,
act = paddle.activation.Tanh(),
gate_act = paddle.activation.Sigmoid(),
state_act = paddle.activation.Tanh(),
reverse = ((i % 2) == 1),
bias_attr = std_0,
param_attr = lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size = label_dict_size,
bias_attr = std_default,
input = [
paddle.layer.full_matrix_projection(
input = input_tmp[0], param_attr = hidden_para_attr),
paddle.layer.full_matrix_projection(
input = input_tmp[1], param_attr = lstm_para_attr)
], )
target = paddle.layer.data(name='target', type=d_type(data_reader.get_dict_size('label_dict')))
crf_cost = paddle.layer.crf(
size=data_reader.get_dict_size('label_dict'),
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw', initial_std=args.default_std, learning_rate=args.mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
size=data_reader.get_dict_size('label_dict'),
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
evaluator.sum(input=crf_dec)
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
return np.fromfile(f, dtype = np.float32).reshape(h, w)
parameters = paddle.parameters.create(crf_cost)
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
is_local_flag=True if args.is_local > 0 else False
train_batch_reader = paddle.batch(\
paddle.reader.shuffle(data_reader.get_train_reader(is_local_flag), buf_size=8192), \
batch_size=args.batch_size)
test_batch_reader=paddle.batch(data_reader.get_test_reader(is_local_flag), batch_size=args.batch_size)
feeding=data_reader.data_name_feeding()
trainer = paddle.trainer.SGD(
cost=crf_cost,
parameters=parameters,
update_equation=optimizer,
extra_layers=crf_dec)
trainer.train(
reader=train_batch_reader,
event_handler=event_handler,
num_passes=args.num_passes,
feeding=feeding)