diff --git a/fluid/neural_machine_translation/transformer_nist_base/data_util.py b/fluid/neural_machine_translation/transformer_nist_base/data_util.py index ffa11a7a183173a45e83e06af7d8f0f8c97a17fa..26d974e6075fc9e3affbf4e6d62a64bdade153ad 100644 --- a/fluid/neural_machine_translation/transformer_nist_base/data_util.py +++ b/fluid/neural_machine_translation/transformer_nist_base/data_util.py @@ -10,9 +10,9 @@ END_MARK = "" UNK_MARK = "" ''' -START_MARK = "<_GO>" -END_MARK = "<_EOS>" -UNK_MARK = "<_UNK>" +START_MARK = "_GO" +END_MARK = "_EOS" +UNK_MARK = "_UNK" class DataLoader(object): def __init__(self, diff --git a/fluid/neural_machine_translation/transformer_nist_base/nmt_fluid.py b/fluid/neural_machine_translation/transformer_nist_base/nmt_fluid.py index bedddffb1a405084632cf6190a219a5bf9e88f51..8d467c392298e58a27b9fe915913fad8093d7eff 100644 --- a/fluid/neural_machine_translation/transformer_nist_base/nmt_fluid.py +++ b/fluid/neural_machine_translation/transformer_nist_base/nmt_fluid.py @@ -280,8 +280,9 @@ def main(): ts = time.time() total = 0 pass_start_time = time.time() + #print len(train_reader) for batch_id, data in enumerate(train_reader): - print len(data) + #print len(data) if len(data) != args.batch_size: continue @@ -415,10 +416,11 @@ def main(): position_encoding_init(ModelHyperParams.max_length + 1, ModelHyperParams.d_model), place) + #print "/root/data/nist06n/data-%d/part-*" % (args.task_index), train_reader = data_util.DataLoader( src_vocab_fpath="/root/data/nist06n/cn_30001.dict", trg_vocab_fpath="/root/data/nist06n/en_30001.dict", - fpattern="/root/data/nist06/data-%d/part-*" % (args.task_index), + fpattern="/root/data/nist06n/data-%d/part-*" % (args.task_index), batch_size=args.batch_size, token_batch_size=TrainTaskConfig.token_batch_size, sort_by_length=TrainTaskConfig.sort_by_length,