seqToseq with reconstruction net 出现 dynamic_cast<CpuIVector*>(&label) error
Created by: alvations
Tu et al. (2016) 创造了一个“Reconstruction”网络。我在 seqToseq 的demo稍微改了一点,
dataprovider.py里把src_ids
重复一遍 https://github.com/PaddlePaddle/Paddle/blob/develop/demo/seqToseq/dataprovider.py#L88
yield {
'source_language_word': src_ids,
'target_language_word': trg_ids,
'target_language_next_word': trg_ids_next
'source_language_word_2': src_ids,
}
然后在seqToseq_net.py加了:
def gru_encoder_decoder(data_conf, is_generating, word_vector_dim=512,
encoder_size=512, decoder_size=512,
beam_size=3, max_length=250):
"""
A wrapper for an attention version of GRU Encoder-Decoder network
is_generating: whether this config is used for generating
encoder_size: dimension of hidden unit in GRU Encoder network
decoder_size: dimension of hidden unit in GRU Decoder network
word_vector_dim: dimension of word vector
beam_size: expand width in beam search
max_length: a stop condition of sequence generation
"""
for k, v in data_conf.iteritems():
globals()[k] = v
source_dict_dim = get_file_len(src_dict_path)
target_dict_dim = get_file_len(trg_dict_path)
gen_trans_file = gen_result
# Input layer.
src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
# Embedding layer.
src_embedding = embedding_layer(
input=src_word_id,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
# Forward GRU layer.
src_forward = simple_gru(input=src_embedding, size=encoder_size)
# Backward GRU layer.
src_backward = simple_gru(input=src_embedding, size=encoder_size, reverse=True)
# Concatenated Layer = [Forword Backward]
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(encoded_vector)
backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(backward_first)
def gru_decoder_wth_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder',
size=decoder_size,
boot_layer=decoder_boot)
context = simple_attention(encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context)
decoder_inputs += full_matrix_projection(current_word)
gru_or_lstm_step = gru_step_layer(name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with mixed_layer(size=target_dict_dim,
bias_attr=True,
act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_or_lstm_step)
return out, gru_or_lstm_step
def gru_reconstructor_wth_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_reconstructor',
size=decoder_size,
boot_layer=decoder_boot)
context = simple_attention(encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context)
decoder_inputs += full_matrix_projection(current_word)
gru_or_lstm_step = gru_step_layer(name='gru_reconstructor',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with mixed_layer(size=source_dict_dim,
bias_attr=True,
act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_or_lstm_step)
return out, gru_or_lstm_step
decoder_group_name = "decoder_group"
decode_group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
if not is_generating:
trg_embedding = embedding_layer(
input=data_layer(name='target_language_word',
size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
decode_group_inputs.append(trg_embedding)
decoder, decoder_vec = recurrent_group(name=decoder_group_name,
step=gru_decoder_wth_attention,
input=decode_group_inputs)
lbl = data_layer(name='target_language_next_word',
size=target_dict_dim)
decode_cost = classification_cost(input=decoder, label=lbl, )
with mixed_layer(size=decoder_size) as decoder_proj:
decoder_proj += full_matrix_projection(decoder_vec)
reconstructor_group_name = "reconstructor_group"
recon_group_inputs = [StaticInput(input=decoder_vec,is_seq=True),
StaticInput(input=decoder_proj,is_seq=True)]
recon_group_inputs.append(src_embedding)
reconstructor, _ = recurrent_group(name=reconstructor_group_name,
step=gru_reconstructor_wth_attention,
input=recon_group_inputs)
src_word_id_2 = data_layer(name='source_language_word_2',
size=source_dict_dim)
recon_cost = classification_cost(input=reconstructor, label=src_word_id_2)
outputs(decode_cost)
else:
trg_embedding = GeneratedInput(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_wth_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
但是在运行时,Paddle回这个error:
$ bash train.sh
I1228 12:49:41.714779 27295 Util.cpp:155] commandline: /usr/local/bin/../opt/paddle/bin/paddle_trainer --config=train.conf --save_dir=/home/ltan/Ibot-Recon/model-test --use_gpu=false --num_passes=3 --show_parameter_stats_period=1000 --trainer_count=4 --log_period=10 --dot_period=5
I1228 12:49:41.714972 27295 Util.cpp:130] Calling runInitFunctions
I1228 12:49:41.715245 27295 Util.cpp:143] Call runInitFunctions done.
Traceback (most recent call last):
File "<string>", line 13, in <module>
NameError: name 'GLOG_logtostderr' is not defined
[INFO 2016-12-28 12:49:42,055 networks.py:1466] The input order is [source_language_word, target_language_word, target_language_next_word]
[INFO 2016-12-28 12:49:42,055 networks.py:1472] The output order is [__cost_0__]
I1228 12:49:42.072384 27295 Trainer.cpp:170] trainer mode: Normal
I1228 12:49:42.497022 27295 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process
[INFO 2016-12-28 12:49:42,497 dataprovider.py:14] src dict len : 10000
[INFO 2016-12-28 12:49:42,498 dataprovider.py:24] trg dict len : 7116
I1228 12:49:42.509634 27295 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process
[INFO 2016-12-28 12:49:42,509 dataprovider.py:14] src dict len : 10000
[INFO 2016-12-28 12:49:42,509 dataprovider.py:24] trg dict len : 7116
I1228 12:49:42.510082 27295 GradientMachine.cpp:134] Initing parameters..
I1228 12:49:44.382016 27295 GradientMachine.cpp:141] Init parameters done.
I1228 12:49:44.685884 27360 ThreadLocal.cpp:37] thread use undeterministic rand seed:27361
F1228 12:49:45.947146 27301 Matrix.cpp:3154] Check failed: dynamic_cast<CpuIVector*>(&label)
*** Check failure stack trace: ***
@ 0x7f8fde03edaa (unknown)
@ 0x7f8fde03ece4 (unknown)
@ 0x7f8fde03e6e6 (unknown)
@ 0x7f8fde041687 (unknown)
@ 0x6dbd49 paddle::CpuMatrix::oneHotCrossEntropy()
@ 0x5ff835 paddle::CostLayer::forward()
@ 0x6714ce paddle::NeuralNetwork::forward()
@ 0x677997 paddle::TrainerThread::forward()
@ 0x679cb5 paddle::TrainerThread::computeThread()
@ 0x7f8fddbbba60 (unknown)
@ 0x7f8fdee51184 start_thread
@ 0x7f8fdd32337d (unknown)
@ (nil) (unknown)
/usr/local/bin/paddle: line 109: 27295 Aborted (core dumped) ${DEBUGGER} $MYDIR/../opt/paddle/bin/paddle_trainer ${@:2}
这个error是不是我设的神经网络错误呢?